Diff 232884

llvm/include/llvm/CodeGen/TargetLowering.h

Show First 20 Lines • Show All 1,246 Lines • ▼ Show 20 Lines	if (PointerType *PTy = dyn_cast<PointerType>(Ty))
return getPointerMemTy(DL, PTy->getAddressSpace());		return getPointerMemTy(DL, PTy->getAddressSpace());
else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {		else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
Type *Elm = VTy->getElementType();		Type *Elm = VTy->getElementType();
if (PointerType *PT = dyn_cast<PointerType>(Elm)) {		if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace()));		EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace()));
Elm = PointerTy.getTypeForEVT(Ty->getContext());		Elm = PointerTy.getTypeForEVT(Ty->getContext());
}		}
return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),		return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
VTy->getNumElements());		VTy->getElementCount());
		efriedmaUnsubmitted Done Reply Inline Actions While you're here, indentation? efriedma: While you're here, indentation?
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Good spot! sdesmalen: Good spot!
}		}

return getValueType(DL, Ty, AllowUnknown);		return getValueType(DL, Ty, AllowUnknown);
}		}


/// Return the MVT corresponding to this LLVM type. See getValueType.		/// Return the MVT corresponding to this LLVM type. See getValueType.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty,		MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
▲ Show 20 Lines • Show All 3,058 Lines • Show Last 20 Lines

llvm/lib/Analysis/Loads.cpp

Show First 20 Lines • Show All 134 Lines • ▼ Show 20 Lines	return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT,
Visited);		Visited);
}		}

bool llvm::isDereferenceableAndAlignedPointer(const Value V, Type Ty,		bool llvm::isDereferenceableAndAlignedPointer(const Value V, Type Ty,
MaybeAlign MA,		MaybeAlign MA,
const DataLayout &DL,		const DataLayout &DL,
const Instruction *CtxI,		const Instruction *CtxI,
const DominatorTree *DT) {		const DominatorTree *DT) {
if (!Ty->isSized())		if (!Ty->isSized() \|\| (Ty->isVectorTy() && Ty->getVectorIsScalable()))
return false;		return false;
		efriedmaUnsubmitted Done Reply Inline Actions "how many bytes are dereferenced". efriedma: "how many bytes are dereferenced".

// When dereferenceability information is provided by a dereferenceable		// When dereferenceability information is provided by a dereferenceable
// attribute, we know exactly how many bytes are dereferenceable. If we can		// attribute, we know exactly how many bytes are dereferenceable. If we can
// determine the exact offset to the attributed variable, we can use that		// determine the exact offset to the attributed variable, we can use that
// information here.		// information here.

// Require ABI alignment for loads without alignment specification		// Require ABI alignment for loads without alignment specification
const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty);		const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty);
▲ Show 20 Lines • Show All 328 Lines • Show Last 20 Lines

llvm/lib/CodeGen/CodeGenPrepare.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 6,764 Lines • ▼ Show 20 Lines
	/// Although we already have similar splitting in DAG Combine, we duplicate			/// Although we already have similar splitting in DAG Combine, we duplicate
	/// it in CodeGenPrepare to catch the case in which pattern is across			/// it in CodeGenPrepare to catch the case in which pattern is across
	/// multiple BBs. The logic in DAG Combine is kept to catch case generated			/// multiple BBs. The logic in DAG Combine is kept to catch case generated
	/// during code expansion.			/// during code expansion.
	static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,			static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
	const TargetLowering &TLI) {			const TargetLowering &TLI) {
	// Handle simple but common cases only.			// Handle simple but common cases only.
	Type *StoreType = SI.getValueOperand()->getType();			Type *StoreType = SI.getValueOperand()->getType();
				if (StoreType->isVectorTy() && StoreType->getVectorIsScalable())
				return false;

	if (!DL.typeSizeEqualsStoreSize(StoreType) \|\|			if (!DL.typeSizeEqualsStoreSize(StoreType) \|\|
	DL.getTypeSizeInBits(StoreType) == 0)			DL.getTypeSizeInBits(StoreType) == 0)
	return false;			return false;

	unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;			unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
	Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);			Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
	if (!DL.typeSizeEqualsStoreSize(SplitStoreType))			if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
	return false;			return false;
	▲ Show 20 Lines • Show All 659 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 15,686 Lines • ▼ Show 20 Lines	bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
return true;		return true;
}		}

bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {		bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (OptLevel == CodeGenOpt::None \|\| !EnableStoreMerging)		if (OptLevel == CodeGenOpt::None \|\| !EnableStoreMerging)
return false;		return false;

EVT MemVT = St->getMemoryVT();		EVT MemVT = St->getMemoryVT();
		if (MemVT.isScalableVector())
		efriedmaUnsubmitted Not Done Reply Inline Actions This comment doesn't seem quite right. We could theoretically merge two stores if they're both scalable. For example, two `<vscale x 8 x i8>` stores can be merged to one `<vscale x 16 x i8>` store; we know `<vscale x 16 x i8>` is exactly twice as large as `<vscale x 8 x i8>`. You'd need extra logic for that, though, so I'm not suggesting changing the code. efriedma: This comment doesn't seem quite right. We could theoretically merge two stores if they're both…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions You're right, I will change the comment! sdesmalen: You're right, I will change the comment!
		return false;

int64_t ElementSizeBytes = MemVT.getStoreSize();		int64_t ElementSizeBytes = MemVT.getStoreSize();
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;		unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;

if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)		if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
return false;		return false;

bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(		bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat);		Attribute::NoImplicitFloat);
▲ Show 20 Lines • Show All 5,034 Lines • ▼ Show 20 Lines	auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {		if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
int64_t Offset = 0;		int64_t Offset = 0;
if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))		if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
Offset = (LSN->getAddressingMode() == ISD::PRE_INC)		Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
? C->getSExtValue()		? C->getSExtValue()
: (LSN->getAddressingMode() == ISD::PRE_DEC)		: (LSN->getAddressingMode() == ISD::PRE_DEC)
? -1 * C->getSExtValue()		? -1 * C->getSExtValue()
: 0;		: 0;
		uint64_t Size = LSN->getMemoryVT().isScalableVector()
		? MemoryLocation::UnknownSize
		: LSN->getMemoryVT().getStoreSize();
return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),		return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
Offset /base offset/,		Offset /base offset/,
Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),		Optional<int64_t>(Size),
LSN->getMemOperand()};		LSN->getMemOperand()};
}		}
if (const auto *LN = cast<LifetimeSDNode>(N))		if (const auto *LN = cast<LifetimeSDNode>(N))
return {false /isVolatile/, /isAtomic/ false, LN->getOperand(1),		return {false /isVolatile/, /isAtomic/ false, LN->getOperand(1),
(LN->hasOffset()) ? LN->getOffset() : 0,		(LN->hasOffset()) ? LN->getOffset() : 0,
(LN->hasOffset()) ? Optional<int64_t>(LN->getSize())		(LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
: Optional<int64_t>(),		: Optional<int64_t>(),
(MachineMemOperand *)nullptr};		(MachineMemOperand *)nullptr};
▲ Show 20 Lines • Show All 263 Lines • ▼ Show 20 Lines	bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
// We must have a base and an offset.		// We must have a base and an offset.
if (!BasePtr.getBase().getNode())		if (!BasePtr.getBase().getNode())
return false;		return false;

// Do not handle stores to undef base pointers.		// Do not handle stores to undef base pointers.
if (BasePtr.getBase().isUndef())		if (BasePtr.getBase().isUndef())
return false;		return false;

		if (St->getMemoryVT().isScalableVector())
		return false;

// Add ST's interval.		// Add ST's interval.
Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);		Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);

while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {		while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
// If the chain has more than one use, then we can't reorder the mem ops.		// If the chain has more than one use, then we can't reorder the mem ops.
if (!SDValue(Chain, 0)->hasOneUse())		if (!SDValue(Chain, 0)->hasOneUse())
break;		break;
// TODO: Relax for unordered atomics (see D66309)		// TODO: Relax for unordered atomics (see D66309)
▲ Show 20 Lines • Show All 109 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show All 19 Lines
#include "llvm/ADT/FoldingSet.h"		#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/None.h"		#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"		#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"		#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"		#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"		#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"		#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"		#include "llvm/Analysis/BlockFrequencyInfo.h"
		#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"		#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"		#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/ISDOpcodes.h"		#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"		#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"		#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"		#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"		#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"		#include "llvm/CodeGen/MachineMemOperand.h"
▲ Show 20 Lines • Show All 6,681 Lines • ▼ Show 20 Lines	SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,

MMOFlags \|= MachineMemOperand::MOLoad;		MMOFlags \|= MachineMemOperand::MOLoad;
assert((MMOFlags & MachineMemOperand::MOStore) == 0);		assert((MMOFlags & MachineMemOperand::MOStore) == 0);
// If we don't have a PtrInfo, infer the trivial frame index case to simplify		// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.		// clients.
if (PtrInfo.V.isNull())		if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);		PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);

		uint64_t Size = MemVT.isScalableVector() ? MemoryLocation::UnknownSize
		: MemVT.getStoreSize();
MachineFunction &MF = getMachineFunction();		MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(		MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges);		PtrInfo, MMOFlags, Size, Alignment, AAInfo, Ranges);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);		return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}		}

SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,		SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, const SDLoc &dl, SDValue Chain,		EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue Offset, EVT MemVT,		SDValue Ptr, SDValue Offset, EVT MemVT,
MachineMemOperand *MMO) {		MachineMemOperand *MMO) {
if (VT == MemVT) {		if (VT == MemVT) {
▲ Show 20 Lines • Show All 103 Lines • ▼ Show 20 Lines	SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,

MMOFlags \|= MachineMemOperand::MOStore;		MMOFlags \|= MachineMemOperand::MOStore;
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);		assert((MMOFlags & MachineMemOperand::MOLoad) == 0);

if (PtrInfo.V.isNull())		if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);		PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);

MachineFunction &MF = getMachineFunction();		MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(		EVT MemVT = Val.getValueType();
PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo);		uint64_t Size = MemVT.isScalableVector() ? MemoryLocation::UnknownSize
		: MemVT.getStoreSize();
		efriedmaUnsubmitted Done Reply Inline Actions Should we have a helper for this pattern? efriedma: Should we have a helper for this pattern?
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Yes, that would be useful. I've added `MemoryLocation::getSizeOrUnknown(const TypeSize &)` sdesmalen: Yes, that would be useful. I've added `MemoryLocation::getSizeOrUnknown(const TypeSize &)`
		MachineMemOperand *MMO =
		MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);		return getStore(Chain, dl, Val, Ptr, MMO);
}		}

SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,		SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachineMemOperand *MMO) {		SDValue Ptr, MachineMemOperand *MMO) {
assert(Chain.getValueType() == MVT::Other &&		assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");		"Invalid chain type");
EVT VT = Val.getValueType();		EVT VT = Val.getValueType();
▲ Show 20 Lines • Show All 2,839 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Show First 20 Lines • Show All 178 Lines • ▼ Show 20 Lines	public:

void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,		void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
unsigned SubRegIdx);		unsigned SubRegIdx);
void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,		void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
unsigned SubRegIdx);		unsigned SubRegIdx);
void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);

		bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);

void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);

bool tryBitfieldExtractOp(SDNode *N);		bool tryBitfieldExtractOp(SDNode *N);
bool tryBitfieldExtractOpFromSExt(SDNode *N);		bool tryBitfieldExtractOpFromSExt(SDNode *N);
bool tryBitfieldInsertOp(SDNode *N);		bool tryBitfieldInsertOp(SDNode *N);
▲ Show 20 Lines • Show All 1,114 Lines • ▼ Show 20 Lines	void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,

// Transfer memoperands.		// Transfer memoperands.
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();		MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});		CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});

ReplaceNode(N, St);		ReplaceNode(N, St);
}		}

		bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
		SDValue &OffImm) {
		SDLoc dl(N);

		// If this is not a frame index, load directly from this address
		if (N->getOpcode() != ISD::FrameIndex) {
		Base = N;
		OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
		efriedmaUnsubmitted Done Reply Inline Actions This is sort of weird for a method named "SelectAddrModeFrameIndexSVE"; should it not just fail? efriedma: This is sort of weird for a method named "SelectAddrModeFrameIndexSVE"; should it not just fail?
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Agreed, that should not have been there. Fixed. sdesmalen: Agreed, that should not have been there. Fixed.
		efriedmaUnsubmitted Done Reply Inline Actions I'm not sure how you're proving that "N" is a FrameIndexSDNode here? efriedma: I'm not sure how you're proving that "N" is a FrameIndexSDNode here?
		return true;
		}

		// Otherwise, match it for the frame address
		const DataLayout &DL = CurDAG->getDataLayout();
		const TargetLowering *TLI = getTargetLowering();
		int FI = cast<FrameIndexSDNode>(N)->getIndex();
		Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
		OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
		return true;
		}

void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,		void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {		unsigned Opc) {
SDLoc dl(N);		SDLoc dl(N);
EVT VT = N->getOperand(2)->getValueType(0);		EVT VT = N->getOperand(2)->getValueType(0);
const EVT ResTys[] = {MVT::i64, // Type of the write back register		const EVT ResTys[] = {MVT::i64, // Type of the write back register
MVT::Other}; // Type for the Chain		MVT::Other}; // Type for the Chain

// Form a REG_SEQUENCE to force register allocation.		// Form a REG_SEQUENCE to force register allocation.
▲ Show 20 Lines • Show All 2,868 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 9,169 Lines • ▼ Show 20 Lines	bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
// No global is ever allowed as a base.		// No global is ever allowed as a base.
if (AM.BaseGV)		if (AM.BaseGV)
return false;		return false;

// No reg+reg+imm addressing.		// No reg+reg+imm addressing.
if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)		if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
return false;		return false;

		// FIXME: Update this method to support scalable addressing modes.
		if (Ty->isVectorTy() && Ty->getVectorIsScalable())
		return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale;

// check reg + imm case:		// check reg + imm case:
// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12		// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
uint64_t NumBytes = 0;		uint64_t NumBytes = 0;
if (Ty->isSized()) {		if (Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);		uint64_t NumBits = DL.getTypeSizeInBits(Ty).getKnownMinSize();
		efriedmaUnsubmitted Done Reply Inline Actions Is this necessary? efriedma: Is this necessary?
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions No, good catch! sdesmalen: No, good catch!
NumBytes = NumBits / 8;		NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))		if (!isPowerOf2_64(NumBits))
NumBytes = 0;		NumBytes = 0;
}		}

if (!AM.Scale) {		if (!AM.Scale) {
int64_t Offset = AM.BaseOffs;		int64_t Offset = AM.BaseOffs;

▲ Show 20 Lines • Show All 3,411 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 343 Lines • ▼ Show 20 Lines	def simm7s8 : Operand<i32> {
let PrintMethod = "printImmScale<8>";		let PrintMethod = "printImmScale<8>";
}		}

def simm7s16 : Operand<i32> {		def simm7s16 : Operand<i32> {
let ParserMatchClass = SImm7s16Operand;		let ParserMatchClass = SImm7s16Operand;
let PrintMethod = "printImmScale<16>";		let PrintMethod = "printImmScale<16>";
}		}

		def am_sve_fi : ComplexPattern<i64, 2, "SelectAddrModeFrameIndexSVE", []>;

def am_indexed7s8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>;		def am_indexed7s8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>;
def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;		def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;
def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;		def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;
def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;		def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;
def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;		def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;

def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>;		def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>;
def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>;		def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>;
▲ Show 20 Lines • Show All 10,418 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Show First 20 Lines • Show All 1,677 Lines • ▼ Show 20 Lines	default:
break;		break;
case AArch64::STRWui:		case AArch64::STRWui:
case AArch64::STRXui:		case AArch64::STRXui:
case AArch64::STRBui:		case AArch64::STRBui:
case AArch64::STRHui:		case AArch64::STRHui:
case AArch64::STRSui:		case AArch64::STRSui:
case AArch64::STRDui:		case AArch64::STRDui:
case AArch64::STRQui:		case AArch64::STRQui:
		case AArch64::LDR_PXI:
		case AArch64::STR_PXI:
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&		if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {		MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();		FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();		return MI.getOperand(0).getReg();
}		}
break;		break;
}		}
return 0;		return 0;
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines	unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STPWi:		case AArch64::STPWi:
case AArch64::STPSi:		case AArch64::STPSi:
case AArch64::LDNPWi:		case AArch64::LDNPWi:
case AArch64::LDNPSi:		case AArch64::LDNPSi:
case AArch64::STNPWi:		case AArch64::STNPWi:
case AArch64::STNPSi:		case AArch64::STNPSi:
case AArch64::LDG:		case AArch64::LDG:
case AArch64::STGPi:		case AArch64::STGPi:
		case AArch64::LD1B_IMM:
		case AArch64::LD1H_IMM:
		case AArch64::LD1W_IMM:
		case AArch64::LD1D_IMM:
		case AArch64::ST1B_IMM:
		case AArch64::ST1H_IMM:
		case AArch64::ST1W_IMM:
		case AArch64::ST1D_IMM:
return 3;		return 3;
case AArch64::ADDG:		case AArch64::ADDG:
case AArch64::STGOffset:		case AArch64::STGOffset:
		case AArch64::LDR_PXI:
		case AArch64::STR_PXI:
return 2;		return 2;
}		}
}		}

bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {		bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
switch (MI.getOpcode()) {		switch (MI.getOpcode()) {
default:		default:
return false;		return false;
▲ Show 20 Lines • Show All 229 Lines • ▼ Show 20 Lines	AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);		MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
assert(OfsOp.isImm() && "Offset operand wasn't immediate.");		assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
return OfsOp;		return OfsOp;
}		}

bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,		bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
unsigned &Width, int64_t &MinOffset,		unsigned &Width, int64_t &MinOffset,
int64_t &MaxOffset) {		int64_t &MaxOffset) {
		const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
switch (Opcode) {		switch (Opcode) {
// Not a memory operation or something we want to handle.		// Not a memory operation or something we want to handle.
default:		default:
Scale = Width = 0;		Scale = Width = 0;
MinOffset = MaxOffset = 0;		MinOffset = MaxOffset = 0;
return false;		return false;
case AArch64::STRWpost:		case AArch64::STRWpost:
case AArch64::LDRWpost:		case AArch64::LDRWpost:
▲ Show 20 Lines • Show All 147 Lines • ▼ Show 20 Lines	bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDR_PXI:		case AArch64::LDR_PXI:
case AArch64::STR_PXI:		case AArch64::STR_PXI:
Scale = Width = 2;		Scale = Width = 2;
MinOffset = -256;		MinOffset = -256;
MaxOffset = 255;		MaxOffset = 255;
break;		break;
case AArch64::LDR_ZXI:		case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:		case AArch64::STR_ZXI:
Scale = Width = 16;		Scale = 16;
		Width = SVEMaxBytesPerVector;
MinOffset = -256;		MinOffset = -256;
MaxOffset = 255;		MaxOffset = 255;
break;		break;
		case AArch64::LD1B_IMM:
		case AArch64::LD1H_IMM:
		case AArch64::LD1W_IMM:
		case AArch64::LD1D_IMM:
		case AArch64::ST1B_IMM:
		case AArch64::ST1H_IMM:
		case AArch64::ST1W_IMM:
		case AArch64::ST1D_IMM:
		// A full vectors worth of data
		// Width = mbytes * elements
		Scale = 16;
		efriedmaUnsubmitted Done Reply Inline Actions This seems sort of confusing. "Scale" here is implicitly multiplied by vl, and there's isn't any way for the caller to tell except by checking the opcode. efriedma: This seems sort of confusing. "Scale" here is implicitly multiplied by vl, and there's isn't…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions I'm not sure if is an actual issue in practice though. Are you suggesting to make Scale a `TypeSize` instead of an `unsigned`? sdesmalen: I'm not sure if is an actual issue in practice though. Are you suggesting to make Scale a…
		efriedmaUnsubmitted Done Reply Inline Actions Yes, that would force the callers to explicitly handle scalable types. It looks like some of them don't. efriedma: Yes, that would force the callers to explicitly handle scalable types. It looks like some of…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Given that this is a change propagates through the rest of the code-base, I will do this in a separate patch. sdesmalen: Given that this is a change propagates through the rest of the code-base, I will do this in a…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions I've implemented this change in D72758. sdesmalen: I've implemented this change in D72758.
		Width = SVEMaxBytesPerVector;
		MinOffset = -8;
		MaxOffset = 7;
		break;
case AArch64::ST2GOffset:		case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:		case AArch64::STZ2GOffset:
Scale = 16;		Scale = 16;
Width = 32;		Width = 32;
MinOffset = -256;		MinOffset = -256;
MaxOffset = 255;		MaxOffset = 255;
break;		break;
case AArch64::STGPi:		case AArch64::STGPi:
▲ Show 20 Lines • Show All 1,155 Lines • ▼ Show 20 Lines
}		}

static bool isSVEScaledImmInstruction(unsigned Opcode) {		static bool isSVEScaledImmInstruction(unsigned Opcode) {
switch (Opcode) {		switch (Opcode) {
case AArch64::LDR_ZXI:		case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:		case AArch64::STR_ZXI:
case AArch64::LDR_PXI:		case AArch64::LDR_PXI:
case AArch64::STR_PXI:		case AArch64::STR_PXI:
		case AArch64::LD1B_IMM:
		case AArch64::LD1H_IMM:
		case AArch64::LD1W_IMM:
		case AArch64::LD1D_IMM:
		case AArch64::ST1B_IMM:
		case AArch64::ST1H_IMM:
		case AArch64::ST1W_IMM:
		case AArch64::ST1D_IMM:
return true;		return true;
default:		default:
return false;		return false;
}		}
}		}

int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,		int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
StackOffset &SOffset,		StackOffset &SOffset,
▲ Show 20 Lines • Show All 3,035 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

	Show First 20 Lines • Show All 1,148 Lines • ▼ Show 20 Lines
	// 8-element contiguous stores			// 8-element contiguous stores
	defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H_IMM>;			defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H_IMM>;
	defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;			defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;
	defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;			defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;

	// 16-element contiguous stores			// 16-element contiguous stores
	defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>;			defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>;

				multiclass spill_fill_predicate<ValueType Ty, Instruction Load, Instruction Store> {
				// reg + imm (frame-index)
				def : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm9:$offset))),
				(Load GPR64sp:$base, simm9:$offset)>;
				def : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)),
				(Store PPR:$val, GPR64sp:$base, simm9:$offset)>;
				}

				let Predicates = [IsLE] in {
				efriedmaUnsubmitted Done Reply Inline Actions IsLE? Are we supposed to do something different on big-endian targets? efriedma: IsLE? Are we supposed to do something different on big-endian targets?
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions No, that was a misunderstanding on my part. I've removed this now. sdesmalen: No, that was a misunderstanding on my part. I've removed this now.
				defm Pat_SpillFill_P16 : spill_fill_predicate<nxv16i1, LDR_PXI, STR_PXI>;
				defm Pat_SpillFill_P8 : spill_fill_predicate<nxv8i1, LDR_PXI, STR_PXI>;
				defm Pat_SpillFill_P4 : spill_fill_predicate<nxv4i1, LDR_PXI, STR_PXI>;
				defm Pat_SpillFill_P2 : spill_fill_predicate<nxv2i1, LDR_PXI, STR_PXI>;
				}

				multiclass unpred_store<ValueType Ty, Instruction RegImmInst, Instruction PTrue> {
				// reg + imm (frame-index)
				def _reg_imm : Pat<(store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)),
				(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
				}

				defm Pat_ST1B : unpred_store<nxv16i8, ST1B_IMM, PTRUE_B>;
				efriedmaUnsubmitted Done Reply Inline Actions Should we always use PTRUE_B, even for non-byte element sizes, to encourage CSE? Should we prefer to use ldr/str where legal, to take advantage of the larger immediate offset? efriedma: Should we always use PTRUE_B, even for non-byte element sizes, to encourage CSE? Should we…
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions Should we always use PTRUE_B, even for non-byte element sizes, to encourage CSE? Our experience is that vectorized loops have most predicates CSEd anyway. For a loop that operates on two lanes, often a predicate is already available and there is no need to introduce an extra `ptrue_b`. If a loop using floats is vectorized with VF=2, we don't want operations on `<vscale x 2 x float>` to use `ptrue.b` because that would enable operations on all (vscale x) 4 lanes, which may not be valid. Should we prefer to use ldr/str where legal, to take advantage of the larger immediate offset? That would not be endian safe, hence the preference to use ST1 (note that the order is dictated by the AAPCS for when passing the vectors by reference). This case of saving/restoring to/from the stack like this is pretty rare. Normal spills and fills will indeed use the STR/LDR instructions. And normal load/store vector instructions that are not storing to a local will likely use other addressing modes like reg+reg. sdesmalen: > Should we always use PTRUE_B, even for non-byte element sizes, to encourage CSE? Our…
				efriedmaUnsubmitted Done Reply Inline Actions Okay, that makes sense. For the CSE thing, we could maybe add an optimization pass after isel if it's necessary. efriedma: Okay, that makes sense. For the CSE thing, we could maybe add an optimization pass after isel…
				defm Pat_ST1H : unpred_store<nxv8i16, ST1H_IMM, PTRUE_H>;
				defm Pat_ST1W : unpred_store<nxv4i32, ST1W_IMM, PTRUE_S>;
				defm Pat_ST1D : unpred_store<nxv2i64, ST1D_IMM, PTRUE_D>;
				defm Pat_ST1H_float16: unpred_store<nxv8f16, ST1H_IMM, PTRUE_H>;
				defm Pat_ST1W_float : unpred_store<nxv4f32, ST1W_IMM, PTRUE_S>;
				defm Pat_ST1D_double : unpred_store<nxv2f64, ST1D_IMM, PTRUE_D>;

				multiclass unpred_load<ValueType Ty, Instruction RegImmInst, Instruction PTrue> {
				// reg + imm (frame-index)
				def _reg_imm : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm4s1:$offset))),
				(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
				}

				defm Pat_LD1B : unpred_load<nxv16i8, LD1B_IMM, PTRUE_B>;
				defm Pat_LD1H : unpred_load<nxv8i16, LD1H_IMM, PTRUE_H>;
				defm Pat_LD1W : unpred_load<nxv4i32, LD1W_IMM, PTRUE_S>;
				defm Pat_LD1D : unpred_load<nxv2i64, LD1D_IMM, PTRUE_D>;
				defm Pat_LD1H_float16: unpred_load<nxv8f16, LD1H_IMM, PTRUE_H>;
				defm Pat_LD1W_float : unpred_load<nxv4f32, LD1W_IMM, PTRUE_S>;
				defm Pat_LD1D_double : unpred_load<nxv2f64, LD1D_IMM, PTRUE_D>;
	}			}

	let Predicates = [HasSVE2] in {			let Predicates = [HasSVE2] in {
	// SVE2 integer multiply-add (indexed)			// SVE2 integer multiply-add (indexed)
				efriedmaUnsubmitted Not Done Reply Inline Actions nxv2i1 has the same memory layout as nxv16i1? I guess that makes sense given the available instructions. We might need to modify the datalayout to make that work properly; I think, without any explicit guidance from the layout string, it will assume a nxv2i1 load reads "vscale" bytes, not "vscale * 2" bytes. Not something to change in this patch, of course. efriedma: nxv2i1 has the same memory layout as nxv16i1? I guess that makes sense given the available…
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions DataLayout assumes that each boolean has a memory size of `i8` as each predicate needs to be individually addressable, which leads to `storesize(<vscale x 2 x i1>) == storesize(<vscale x 2 x i8>)`. This also means that alloca's of predicates may allocate too much stack space depending on the number of elements. The generated code is correct because all the offsets scale accordingly; you can see this for example in `spill_nxv16i1` and `spill_nxv2i1`. The former (nxv16i1) allocates the sizeof two nxv16i8 vectors and loads the second predicate from offset `8 [* sizeof(predicate)]`, where the latter allocates the sizeof one nxv16i8 vector, and loads the second predicate from offset `2 [* sizeof(predicate)]`. (`sizeof(predicate) = (vscale * 2 bytes)`) This is different from spills introduced by e.g. the register allocator, where LLVM allocates space for the size of an (otherwise opaque) predicate register, set to 2 bytes. sdesmalen: DataLayout assumes that each boolean has a memory size of `i8` as each predicate needs to be…
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions I should probably also point out that there is no other interface for users to read/write these predicates other than through `svbool_t`, which is an opaque type, so I don't think there is any need to expand the store of `nxv16i1` to a store of `nxv16i8`. sdesmalen: I should probably also point out that there is no other interface for users to read/write these…
				efriedmaUnsubmitted Not Done Reply Inline Actions DataLayout assumes that each boolean has a memory size of i8 as each predicate needs to be individually addressable The whole area is still messy, unfortunately. Like I stated before, the "store size" for vectors assumes the bits are tightly packed. For non-scalable vectors, SelectionDAG legalization assumes the bits are tightly packed. (I think we fixed all the legalization routines to be consistent with this.) And for AVX-512, loads and stores of `<16 x i1>` etc. are lowered to bit-packed operations (kmovw). I just did some quick tests, though, and unfortunately, it looks like the alignment (and therefore the allocation size) is messed up. The alignment of vectors is currently based on the alignment of the element type, not the size of the vector, so it's much larger than the store size for `<N x i1>`. Unless the store size is exactly 64 or 128 bits wide, in which case the alignment is 64/128 bits respectively. Probably someone needs to spend more time in this area at some point. efriedma: > DataLayout assumes that each boolean has a memory size of i8 as each predicate needs to be…
	defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla">;			defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla">;
	defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls">;			defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls">;

	// SVE2 saturating multiply-add high (indexed)			// SVE2 saturating multiply-add high (indexed)
	defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah">;			defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah">;
	defm SQRDMLSH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b1, "sqrdmlsh">;			defm SQRDMLSH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b1, "sqrdmlsh">;

	// SVE2 saturating multiply-add high (vectors, unpredicated)			// SVE2 saturating multiply-add high (vectors, unpredicated)
	▲ Show 20 Lines • Show All 397 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

	Show First 20 Lines • Show All 646 Lines • ▼ Show 20 Lines
	// The number of bits in a SVE register is architecturally defined			// The number of bits in a SVE register is architecturally defined
	// to be a multiple of this value. If <M x t> has this number of bits,			// to be a multiple of this value. If <M x t> has this number of bits,
	// a <n x M x t> vector can be stored in a SVE register without any			// a <n x M x t> vector can be stored in a SVE register without any
	// redundant bits. If <M x t> has this number of bits divided by P,			// redundant bits. If <M x t> has this number of bits divided by P,
	// a <n x M x t> vector is stored in a SVE register by placing index i			// a <n x M x t> vector is stored in a SVE register by placing index i
	// in index iP of a <n x (MP) x t> vector. The other elements of the			// in index iP of a <n x (MP) x t> vector. The other elements of the
	// <n x (M*P) x t> vector (such as index 1) are undefined.			// <n x (M*P) x t> vector (such as index 1) are undefined.
	static constexpr unsigned SVEBitsPerBlock = 128;			static constexpr unsigned SVEBitsPerBlock = 128;
				static constexpr unsigned SVEMaxBitsPerVector = 2048;
	} // end namespace AArch64			} // end namespace AArch64

	} // end namespace llvm			} // end namespace llvm

	#endif			#endif

llvm/test/CodeGen/AArch64/spillfill-sve.ll

This file was added.

				; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s \| FileCheck %s

				; This file checks that unpredicated load/store instructions to locals
				; use the right instructions and offsets.

				; Data fills

				define void @fill_nxv16i8() {
				; CHECK-LABEL: fill_nxv16i8
				; CHECK-DAG: ld1b { z{{[01]}}.b }, p0/z, [sp]
				; CHECK-DAG: ld1b { z{{[01]}}.b }, p0/z, [sp, #1, mul vl]
				%local0 = alloca <vscale x 16 x i8>
				%local1 = alloca <vscale x 16 x i8>
				load volatile <vscale x 16 x i8>, <vscale x 16 x i8>* %local0
				load volatile <vscale x 16 x i8>, <vscale x 16 x i8>* %local1
				ret void
				}

				define void @fill_nxv8i16() {
				; CHECK-LABEL: fill_nxv8i16
				; CHECK-DAG: ld1h { z{{[01]}}.h }, p0/z, [sp]
				; CHECK-DAG: ld1h { z{{[01]}}.h }, p0/z, [sp, #1, mul vl]
				%local0 = alloca <vscale x 8 x i16>
				%local1 = alloca <vscale x 8 x i16>
				load volatile <vscale x 8 x i16>, <vscale x 8 x i16>* %local0
				load volatile <vscale x 8 x i16>, <vscale x 8 x i16>* %local1
				ret void
				}

				define void @fill_nxv4i32() {
				; CHECK-LABEL: fill_nxv4i32
				; CHECK-DAG: ld1w { z{{[01]}}.s }, p0/z, [sp]
				; CHECK-DAG: ld1w { z{{[01]}}.s }, p0/z, [sp, #1, mul vl]
				%local0 = alloca <vscale x 4 x i32>
				%local1 = alloca <vscale x 4 x i32>
				load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* %local0
				load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* %local1
				ret void
				}

				define void @fill_nxv2i64() {
				; CHECK-LABEL: fill_nxv2i64
				; CHECK-DAG: ld1d { z{{[01]}}.d }, p0/z, [sp]
				; CHECK-DAG: ld1d { z{{[01]}}.d }, p0/z, [sp, #1, mul vl]
				%local0 = alloca <vscale x 2 x i64>
				%local1 = alloca <vscale x 2 x i64>
				load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %local0
				load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %local1
				ret void
				}


				; Data spills

				define void @spill_nxv16i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1) {
				; CHECK-LABEL: spill_nxv16i8
				; CHECK-DAG: st1b { z{{[01]}}.b }, p0, [sp]
				; CHECK-DAG: st1b { z{{[01]}}.b }, p0, [sp, #1, mul vl]
				%local0 = alloca <vscale x 16 x i8>
				%local1 = alloca <vscale x 16 x i8>
				store volatile <vscale x 16 x i8> %v0, <vscale x 16 x i8>* %local0
				store volatile <vscale x 16 x i8> %v1, <vscale x 16 x i8>* %local1
				ret void
				}

				define void @spill_nxv8i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1) {
				; CHECK-LABEL: spill_nxv8i16
				; CHECK-DAG: st1h { z{{[01]}}.h }, p0, [sp]
				; CHECK-DAG: st1h { z{{[01]}}.h }, p0, [sp, #1, mul vl]
				%local0 = alloca <vscale x 8 x i16>
				%local1 = alloca <vscale x 8 x i16>
				store volatile <vscale x 8 x i16> %v0, <vscale x 8 x i16>* %local0
				store volatile <vscale x 8 x i16> %v1, <vscale x 8 x i16>* %local1
				ret void
				}

				define void @spill_nxv4i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1) {
				; CHECK-LABEL: spill_nxv4i32
				; CHECK-DAG: st1w { z{{[01]}}.s }, p0, [sp]
				; CHECK-DAG: st1w { z{{[01]}}.s }, p0, [sp, #1, mul vl]
				%local0 = alloca <vscale x 4 x i32>
				%local1 = alloca <vscale x 4 x i32>
				store volatile <vscale x 4 x i32> %v0, <vscale x 4 x i32>* %local0
				store volatile <vscale x 4 x i32> %v1, <vscale x 4 x i32>* %local1
				ret void
				}

				define void @spill_nxv2i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1) {
				; CHECK-LABEL: spill_nxv2i64
				; CHECK-DAG: st1d { z{{[01]}}.d }, p0, [sp]
				; CHECK-DAG: st1d { z{{[01]}}.d }, p0, [sp, #1, mul vl]
				%local0 = alloca <vscale x 2 x i64>
				%local1 = alloca <vscale x 2 x i64>
				store volatile <vscale x 2 x i64> %v0, <vscale x 2 x i64>* %local0
				store volatile <vscale x 2 x i64> %v1, <vscale x 2 x i64>* %local1
				ret void
				}

				; Predicate fills

				define void @fill_nxv16i1() {
				; CHECK-LABEL: fill_nxv16i1
				; CHECK-DAG: ldr p{{[01]}}, [sp, #8, mul vl]
				; CHECK-DAG: ldr p{{[01]}}, [sp]
				%local0 = alloca <vscale x 16 x i1>
				%local1 = alloca <vscale x 16 x i1>
				load volatile <vscale x 16 x i1>, <vscale x 16 x i1>* %local0
				load volatile <vscale x 16 x i1>, <vscale x 16 x i1>* %local1
				ret void
				}

				define void @fill_nxv8i1() {
				; CHECK-LABEL: fill_nxv8i1
				; CHECK-DAG: ldr p{{[01]}}, [sp, #4, mul vl]
				; CHECK-DAG: ldr p{{[01]}}, [sp]
				%local0 = alloca <vscale x 8 x i1>
				%local1 = alloca <vscale x 8 x i1>
				load volatile <vscale x 8 x i1>, <vscale x 8 x i1>* %local0
				load volatile <vscale x 8 x i1>, <vscale x 8 x i1>* %local1
				ret void
				}

				define void @fill_nxv4i1() {
				; CHECK-LABEL: fill_nxv4i1
				; CHECK-DAG: ldr p{{[01]}}, [sp, #6, mul vl]
				; CHECK-DAG: ldr p{{[01]}}, [sp, #4, mul vl]
				%local0 = alloca <vscale x 4 x i1>
				%local1 = alloca <vscale x 4 x i1>
				load volatile <vscale x 4 x i1>, <vscale x 4 x i1>* %local0
				load volatile <vscale x 4 x i1>, <vscale x 4 x i1>* %local1
				ret void
				}

				define void @fill_nxv2i1() {
				; CHECK-LABEL: fill_nxv2i1
				; CHECK-DAG: ldr p{{[01]}}, [sp, #7, mul vl]
				; CHECK-DAG: ldr p{{[01]}}, [sp, #6, mul vl]
				%local0 = alloca <vscale x 2 x i1>
				%local1 = alloca <vscale x 2 x i1>
				load volatile <vscale x 2 x i1>, <vscale x 2 x i1>* %local0
				load volatile <vscale x 2 x i1>, <vscale x 2 x i1>* %local1
				ret void
				}

				; Predicate spills

				define void @spill_nxv16i1(<vscale x 16 x i1> %v0, <vscale x 16 x i1> %v1) {
				; CHECK-LABEL: spill_nxv16i1
				; CHECK-DAG: str p{{[01]}}, [sp, #8, mul vl]
				; CHECK-DAG: str p{{[01]}}, [sp]
				%local0 = alloca <vscale x 16 x i1>
				%local1 = alloca <vscale x 16 x i1>
				store volatile <vscale x 16 x i1> %v0, <vscale x 16 x i1>* %local0
				store volatile <vscale x 16 x i1> %v1, <vscale x 16 x i1>* %local1
				ret void
				}

				define void @spill_nxv8i1(<vscale x 8 x i1> %v0, <vscale x 8 x i1> %v1) {
				; CHECK-LABEL: spill_nxv8i1
				; CHECK-DAG: str p{{[01]}}, [sp, #4, mul vl]
				; CHECK-DAG: str p{{[01]}}, [sp]
				%local0 = alloca <vscale x 8 x i1>
				%local1 = alloca <vscale x 8 x i1>
				store volatile <vscale x 8 x i1> %v0, <vscale x 8 x i1>* %local0
				store volatile <vscale x 8 x i1> %v1, <vscale x 8 x i1>* %local1
				ret void
				}

				define void @spill_nxv4i1(<vscale x 4 x i1> %v0, <vscale x 4 x i1> %v1) {
				; CHECK-LABEL: spill_nxv4i1
				; CHECK-DAG: str p{{[01]}}, [sp, #6, mul vl]
				; CHECK-DAG: str p{{[01]}}, [sp, #4, mul vl]
				%local0 = alloca <vscale x 4 x i1>
				%local1 = alloca <vscale x 4 x i1>
				store volatile <vscale x 4 x i1> %v0, <vscale x 4 x i1>* %local0
				store volatile <vscale x 4 x i1> %v1, <vscale x 4 x i1>* %local1
				ret void
				}

				define void @spill_nxv2i1(<vscale x 2 x i1> %v0, <vscale x 2 x i1> %v1) {
				; CHECK-LABEL: spill_nxv2i1
				; CHECK-DAG: str p{{[01]}}, [sp, #7, mul vl]
				; CHECK-DAG: str p{{[01]}}, [sp, #6, mul vl]
				%local0 = alloca <vscale x 2 x i1>
				%local1 = alloca <vscale x 2 x i1>
				store volatile <vscale x 2 x i1> %v0, <vscale x 2 x i1>* %local0
				store volatile <vscale x 2 x i1> %v1, <vscale x 2 x i1>* %local1
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add patterns for unpredicated load/store to frame-indices.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 232884

llvm/include/llvm/CodeGen/TargetLowering.h

llvm/lib/Analysis/Loads.cpp

llvm/lib/CodeGen/CodeGenPrepare.cpp

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrFormats.td

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

llvm/test/CodeGen/AArch64/spillfill-sve.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add patterns for unpredicated load/store to frame-indices.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 232884

llvm/include/llvm/CodeGen/TargetLowering.h

llvm/lib/Analysis/Loads.cpp

llvm/lib/CodeGen/CodeGenPrepare.cpp

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrFormats.td

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

llvm/test/CodeGen/AArch64/spillfill-sve.ll

[AArch64][SVE] Add patterns for unpredicated load/store to frame-indices.
ClosedPublic