Diff 239573

llvm/include/llvm/Analysis/MemoryLocation.h

Show All 13 Lines

#ifndef LLVM_ANALYSIS_MEMORYLOCATION_H		#ifndef LLVM_ANALYSIS_MEMORYLOCATION_H
#define LLVM_ANALYSIS_MEMORYLOCATION_H		#define LLVM_ANALYSIS_MEMORYLOCATION_H

#include "llvm/ADT/DenseMapInfo.h"		#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Optional.h"		#include "llvm/ADT/Optional.h"
#include "llvm/IR/Instructions.h"		#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"		#include "llvm/IR/Metadata.h"
		#include "llvm/Support/TypeSize.h"

namespace llvm {		namespace llvm {

class LoadInst;		class LoadInst;
class StoreInst;		class StoreInst;
class MemTransferInst;		class MemTransferInst;
class MemIntrinsic;		class MemIntrinsic;
class AtomicMemTransferInst;		class AtomicMemTransferInst;
▲ Show 20 Lines • Show All 205 Lines • ▼ Show 20 Lines	public:
/// Return a location representing a particular argument of a call.		/// Return a location representing a particular argument of a call.
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx,		static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx,
const TargetLibraryInfo *TLI);		const TargetLibraryInfo *TLI);
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx,		static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx,
const TargetLibraryInfo &TLI) {		const TargetLibraryInfo &TLI) {
return getForArgument(Call, ArgIdx, &TLI);		return getForArgument(Call, ArgIdx, &TLI);
}		}

		// Return the exact size if the exact size is known at compiletime,
		// otherwise return MemoryLocation::UnknownSize.
		static uint64_t getSizeOrUnknown(const TypeSize &T) {
		return T.isScalable() ? UnknownSize : T.getFixedSize();
		}

explicit MemoryLocation(const Value *Ptr = nullptr,		explicit MemoryLocation(const Value *Ptr = nullptr,
LocationSize Size = LocationSize::unknown(),		LocationSize Size = LocationSize::unknown(),
const AAMDNodes &AATags = AAMDNodes())		const AAMDNodes &AATags = AAMDNodes())
: Ptr(Ptr), Size(Size), AATags(AATags) {}		: Ptr(Ptr), Size(Size), AATags(AATags) {}

MemoryLocation getWithNewPtr(const Value *NewPtr) const {		MemoryLocation getWithNewPtr(const Value *NewPtr) const {
MemoryLocation Copy(*this);		MemoryLocation Copy(*this);
Copy.Ptr = NewPtr;		Copy.Ptr = NewPtr;
▲ Show 20 Lines • Show All 57 Lines • Show Last 20 Lines

llvm/lib/Analysis/Loads.cpp

Show First 20 Lines • Show All 134 Lines • ▼ Show 20 Lines	return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT,
Visited);		Visited);
}		}

bool llvm::isDereferenceableAndAlignedPointer(const Value V, Type Ty,		bool llvm::isDereferenceableAndAlignedPointer(const Value V, Type Ty,
MaybeAlign MA,		MaybeAlign MA,
const DataLayout &DL,		const DataLayout &DL,
const Instruction *CtxI,		const Instruction *CtxI,
const DominatorTree *DT) {		const DominatorTree *DT) {
if (!Ty->isSized())		// For unsized types or scalable vectors we don't know exactly how many bytes
		// are dereferenced, so bail out.
		efriedmaUnsubmitted Done Reply Inline Actions "how many bytes are dereferenced". efriedma: "how many bytes are dereferenced".
		if (!Ty->isSized() \|\| (Ty->isVectorTy() && Ty->getVectorIsScalable()))
return false;		return false;

// When dereferenceability information is provided by a dereferenceable		// When dereferenceability information is provided by a dereferenceable
// attribute, we know exactly how many bytes are dereferenceable. If we can		// attribute, we know exactly how many bytes are dereferenceable. If we can
// determine the exact offset to the attributed variable, we can use that		// determine the exact offset to the attributed variable, we can use that
// information here.		// information here.

// Require ABI alignment for loads without alignment specification		// Require ABI alignment for loads without alignment specification
▲ Show 20 Lines • Show All 329 Lines • Show Last 20 Lines

llvm/lib/CodeGen/CodeGenPrepare.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 6,803 Lines • ▼ Show 20 Lines
	/// Although we already have similar splitting in DAG Combine, we duplicate			/// Although we already have similar splitting in DAG Combine, we duplicate
	/// it in CodeGenPrepare to catch the case in which pattern is across			/// it in CodeGenPrepare to catch the case in which pattern is across
	/// multiple BBs. The logic in DAG Combine is kept to catch case generated			/// multiple BBs. The logic in DAG Combine is kept to catch case generated
	/// during code expansion.			/// during code expansion.
	static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,			static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
	const TargetLowering &TLI) {			const TargetLowering &TLI) {
	// Handle simple but common cases only.			// Handle simple but common cases only.
	Type *StoreType = SI.getValueOperand()->getType();			Type *StoreType = SI.getValueOperand()->getType();

				// The code below assumes shifting a value by <number of bits>,
				// whereas scalable vectors would have to be shifted by
				// <2log(vscale) + number of bits> in order to store the
				// low/high parts. Bailing out for now.
				if (StoreType->isVectorTy() && StoreType->getVectorIsScalable())
				return false;

	if (!DL.typeSizeEqualsStoreSize(StoreType) \|\|			if (!DL.typeSizeEqualsStoreSize(StoreType) \|\|
	DL.getTypeSizeInBits(StoreType) == 0)			DL.getTypeSizeInBits(StoreType) == 0)
	return false;			return false;

	unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;			unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
	Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);			Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
	if (!DL.typeSizeEqualsStoreSize(SplitStoreType))			if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
	return false;			return false;
	▲ Show 20 Lines • Show All 689 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 15,732 Lines • ▼ Show 20 Lines	for (unsigned i = 0; i < NumStores; ++i)
}		}
return true;		return true;
}		}

bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {		bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (OptLevel == CodeGenOpt::None \|\| !EnableStoreMerging)		if (OptLevel == CodeGenOpt::None \|\| !EnableStoreMerging)
return false;		return false;

		// TODO: Extend this function to merge stores of scalable vectors.
		// (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
		efriedmaUnsubmitted Not Done Reply Inline Actions This comment doesn't seem quite right. We could theoretically merge two stores if they're both scalable. For example, two `<vscale x 8 x i8>` stores can be merged to one `<vscale x 16 x i8>` store; we know `<vscale x 16 x i8>` is exactly twice as large as `<vscale x 8 x i8>`. You'd need extra logic for that, though, so I'm not suggesting changing the code. efriedma: This comment doesn't seem quite right. We could theoretically merge two stores if they're both…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions You're right, I will change the comment! sdesmalen: You're right, I will change the comment!
		// store since we know <vscale x 16 x i8> is exactly twice as large as
		// <vscale x 8 x i8>). Until then, bail out for scalable vectors.
EVT MemVT = St->getMemoryVT();		EVT MemVT = St->getMemoryVT();
		if (MemVT.isScalableVector())
		return false;

int64_t ElementSizeBytes = MemVT.getStoreSize();		int64_t ElementSizeBytes = MemVT.getStoreSize();
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;		unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;

if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)		if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
return false;		return false;

bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(		bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat);		Attribute::NoImplicitFloat);
▲ Show 20 Lines • Show All 5,087 Lines • ▼ Show 20 Lines	auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {		if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
int64_t Offset = 0;		int64_t Offset = 0;
if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))		if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
Offset = (LSN->getAddressingMode() == ISD::PRE_INC)		Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
? C->getSExtValue()		? C->getSExtValue()
: (LSN->getAddressingMode() == ISD::PRE_DEC)		: (LSN->getAddressingMode() == ISD::PRE_DEC)
? -1 * C->getSExtValue()		? -1 * C->getSExtValue()
: 0;		: 0;
		uint64_t Size =
		MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),		return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
Offset /base offset/,		Offset /base offset/,
Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),		Optional<int64_t>(Size),
LSN->getMemOperand()};		LSN->getMemOperand()};
}		}
if (const auto *LN = cast<LifetimeSDNode>(N))		if (const auto *LN = cast<LifetimeSDNode>(N))
return {false /isVolatile/, /isAtomic/ false, LN->getOperand(1),		return {false /isVolatile/, /isAtomic/ false, LN->getOperand(1),
(LN->hasOffset()) ? LN->getOffset() : 0,		(LN->hasOffset()) ? LN->getOffset() : 0,
(LN->hasOffset()) ? Optional<int64_t>(LN->getSize())		(LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
: Optional<int64_t>(),		: Optional<int64_t>(),
(MachineMemOperand *)nullptr};		(MachineMemOperand *)nullptr};
▲ Show 20 Lines • Show All 263 Lines • ▼ Show 20 Lines	bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
// We must have a base and an offset.		// We must have a base and an offset.
if (!BasePtr.getBase().getNode())		if (!BasePtr.getBase().getNode())
return false;		return false;

// Do not handle stores to undef base pointers.		// Do not handle stores to undef base pointers.
if (BasePtr.getBase().isUndef())		if (BasePtr.getBase().isUndef())
return false;		return false;

		// BaseIndexOffset assumes that offsets are fixed-size, which
		// is not valid for scalable vectors where the offsets are
		// scaled by `vscale`, so bail out early.
		if (St->getMemoryVT().isScalableVector())
		return false;

// Add ST's interval.		// Add ST's interval.
Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);		Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);

while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {		while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
// If the chain has more than one use, then we can't reorder the mem ops.		// If the chain has more than one use, then we can't reorder the mem ops.
if (!SDValue(Chain, 0)->hasOneUse())		if (!SDValue(Chain, 0)->hasOneUse())
break;		break;
// TODO: Relax for unordered atomics (see D66309)		// TODO: Relax for unordered atomics (see D66309)
▲ Show 20 Lines • Show All 109 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,818 Lines • ▼ Show 20 Lines	SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,

MMOFlags \|= MachineMemOperand::MOLoad;		MMOFlags \|= MachineMemOperand::MOLoad;
assert((MMOFlags & MachineMemOperand::MOStore) == 0);		assert((MMOFlags & MachineMemOperand::MOStore) == 0);
// If we don't have a PtrInfo, infer the trivial frame index case to simplify		// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.		// clients.
if (PtrInfo.V.isNull())		if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);		PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);

		uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
MachineFunction &MF = getMachineFunction();		MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(		MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges);		PtrInfo, MMOFlags, Size, Alignment, AAInfo, Ranges);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);		return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}		}

SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,		SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, const SDLoc &dl, SDValue Chain,		EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue Offset, EVT MemVT,		SDValue Ptr, SDValue Offset, EVT MemVT,
MachineMemOperand *MMO) {		MachineMemOperand *MMO) {
if (VT == MemVT) {		if (VT == MemVT) {
▲ Show 20 Lines • Show All 103 Lines • ▼ Show 20 Lines	SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,

MMOFlags \|= MachineMemOperand::MOStore;		MMOFlags \|= MachineMemOperand::MOStore;
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);		assert((MMOFlags & MachineMemOperand::MOLoad) == 0);

if (PtrInfo.V.isNull())		if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);		PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);

MachineFunction &MF = getMachineFunction();		MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(		uint64_t Size =
PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo);		MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
		MachineMemOperand *MMO =
		efriedmaUnsubmitted Done Reply Inline Actions Should we have a helper for this pattern? efriedma: Should we have a helper for this pattern?
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Yes, that would be useful. I've added `MemoryLocation::getSizeOrUnknown(const TypeSize &)` sdesmalen: Yes, that would be useful. I've added `MemoryLocation::getSizeOrUnknown(const TypeSize &)`
		MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);		return getStore(Chain, dl, Val, Ptr, MMO);
}		}

SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,		SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachineMemOperand *MMO) {		SDValue Ptr, MachineMemOperand *MMO) {
assert(Chain.getValueType() == MVT::Other &&		assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");		"Invalid chain type");
EVT VT = Val.getValueType();		EVT VT = Val.getValueType();
▲ Show 20 Lines • Show All 2,837 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Show First 20 Lines • Show All 214 Lines • ▼ Show 20 Lines	public:

void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,		void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
unsigned SubRegIdx);		unsigned SubRegIdx);
void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,		void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
unsigned SubRegIdx);		unsigned SubRegIdx);
void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);

		bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);

void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);		void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);

bool tryBitfieldExtractOp(SDNode *N);		bool tryBitfieldExtractOp(SDNode *N);
bool tryBitfieldExtractOpFromSExt(SDNode *N);		bool tryBitfieldExtractOpFromSExt(SDNode *N);
bool tryBitfieldInsertOp(SDNode *N);		bool tryBitfieldInsertOp(SDNode *N);
▲ Show 20 Lines • Show All 1,138 Lines • ▼ Show 20 Lines	void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,

// Transfer memoperands.		// Transfer memoperands.
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();		MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});		CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});

ReplaceNode(N, St);		ReplaceNode(N, St);
}		}

		bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
		SDValue &OffImm) {
		SDLoc dl(N);
		const DataLayout &DL = CurDAG->getDataLayout();
		const TargetLowering *TLI = getTargetLowering();

		// Try to match it for the frame address
		if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
		efriedmaUnsubmitted Done Reply Inline Actions This is sort of weird for a method named "SelectAddrModeFrameIndexSVE"; should it not just fail? efriedma: This is sort of weird for a method named "SelectAddrModeFrameIndexSVE"; should it not just fail?
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Agreed, that should not have been there. Fixed. sdesmalen: Agreed, that should not have been there. Fixed.
		efriedmaUnsubmitted Done Reply Inline Actions I'm not sure how you're proving that "N" is a FrameIndexSDNode here? efriedma: I'm not sure how you're proving that "N" is a FrameIndexSDNode here?
		int FI = FINode->getIndex();
		Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
		OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
		return true;
		}

		return false;
		}

void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,		void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {		unsigned Opc) {
SDLoc dl(N);		SDLoc dl(N);
EVT VT = N->getOperand(2)->getValueType(0);		EVT VT = N->getOperand(2)->getValueType(0);
const EVT ResTys[] = {MVT::i64, // Type of the write back register		const EVT ResTys[] = {MVT::i64, // Type of the write back register
MVT::Other}; // Type for the Chain		MVT::Other}; // Type for the Chain

// Form a REG_SEQUENCE to force register allocation.		// Form a REG_SEQUENCE to force register allocation.
▲ Show 20 Lines • Show All 2,998 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 9,452 Lines • ▼ Show 20 Lines	bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
// No global is ever allowed as a base.		// No global is ever allowed as a base.
if (AM.BaseGV)		if (AM.BaseGV)
return false;		return false;

// No reg+reg+imm addressing.		// No reg+reg+imm addressing.
if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)		if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
return false;		return false;

		// FIXME: Update this method to support scalable addressing modes.
		if (Ty->isVectorTy() && Ty->getVectorIsScalable())
		return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale;

// check reg + imm case:		// check reg + imm case:
// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12		// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
uint64_t NumBytes = 0;		uint64_t NumBytes = 0;
if (Ty->isSized()) {		if (Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);		uint64_t NumBits = DL.getTypeSizeInBits(Ty);
		efriedmaUnsubmitted Done Reply Inline Actions Is this necessary? efriedma: Is this necessary?
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions No, good catch! sdesmalen: No, good catch!
NumBytes = NumBits / 8;		NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))		if (!isPowerOf2_64(NumBits))
NumBytes = 0;		NumBytes = 0;
}		}

if (!AM.Scale) {		if (!AM.Scale) {
int64_t Offset = AM.BaseOffs;		int64_t Offset = AM.BaseOffs;

▲ Show 20 Lines • Show All 3,986 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 343 Lines • ▼ Show 20 Lines	def simm7s8 : Operand<i32> {
let PrintMethod = "printImmScale<8>";		let PrintMethod = "printImmScale<8>";
}		}

def simm7s16 : Operand<i32> {		def simm7s16 : Operand<i32> {
let ParserMatchClass = SImm7s16Operand;		let ParserMatchClass = SImm7s16Operand;
let PrintMethod = "printImmScale<16>";		let PrintMethod = "printImmScale<16>";
}		}

		def am_sve_fi : ComplexPattern<i64, 2, "SelectAddrModeFrameIndexSVE", []>;

def am_indexed7s8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>;		def am_indexed7s8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>;
def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;		def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;
def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;		def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;
def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;		def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;
def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;		def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;

def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>;		def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>;
def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>;		def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>;
▲ Show 20 Lines • Show All 10,522 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Show First 20 Lines • Show All 1,685 Lines • ▼ Show 20 Lines	default:
break;		break;
case AArch64::STRWui:		case AArch64::STRWui:
case AArch64::STRXui:		case AArch64::STRXui:
case AArch64::STRBui:		case AArch64::STRBui:
case AArch64::STRHui:		case AArch64::STRHui:
case AArch64::STRSui:		case AArch64::STRSui:
case AArch64::STRDui:		case AArch64::STRDui:
case AArch64::STRQui:		case AArch64::STRQui:
		case AArch64::LDR_PXI:
		case AArch64::STR_PXI:
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&		if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {		MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();		FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();		return MI.getOperand(0).getReg();
}		}
break;		break;
}		}
return 0;		return 0;
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines	unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STPWi:		case AArch64::STPWi:
case AArch64::STPSi:		case AArch64::STPSi:
case AArch64::LDNPWi:		case AArch64::LDNPWi:
case AArch64::LDNPSi:		case AArch64::LDNPSi:
case AArch64::STNPWi:		case AArch64::STNPWi:
case AArch64::STNPSi:		case AArch64::STNPSi:
case AArch64::LDG:		case AArch64::LDG:
case AArch64::STGPi:		case AArch64::STGPi:
		case AArch64::LD1B_IMM:
		case AArch64::LD1H_IMM:
		case AArch64::LD1W_IMM:
		case AArch64::LD1D_IMM:
		case AArch64::ST1B_IMM:
		case AArch64::ST1H_IMM:
		case AArch64::ST1W_IMM:
		case AArch64::ST1D_IMM:
return 3;		return 3;
case AArch64::ADDG:		case AArch64::ADDG:
case AArch64::STGOffset:		case AArch64::STGOffset:
		case AArch64::LDR_PXI:
		case AArch64::STR_PXI:
return 2;		return 2;
}		}
}		}

bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {		bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
switch (MI.getOpcode()) {		switch (MI.getOpcode()) {
default:		default:
return false;		return false;
▲ Show 20 Lines • Show All 234 Lines • ▼ Show 20 Lines	AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);		MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
assert(OfsOp.isImm() && "Offset operand wasn't immediate.");		assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
return OfsOp;		return OfsOp;
}		}

bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,		bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
unsigned &Width, int64_t &MinOffset,		unsigned &Width, int64_t &MinOffset,
int64_t &MaxOffset) {		int64_t &MaxOffset) {
		const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
switch (Opcode) {		switch (Opcode) {
// Not a memory operation or something we want to handle.		// Not a memory operation or something we want to handle.
default:		default:
Scale = Width = 0;		Scale = Width = 0;
MinOffset = MaxOffset = 0;		MinOffset = MaxOffset = 0;
return false;		return false;
case AArch64::STRWpost:		case AArch64::STRWpost:
case AArch64::LDRWpost:		case AArch64::LDRWpost:
▲ Show 20 Lines • Show All 148 Lines • ▼ Show 20 Lines	bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STGOffset:		case AArch64::STGOffset:
case AArch64::STZGOffset:		case AArch64::STZGOffset:
Scale = Width = 16;		Scale = Width = 16;
MinOffset = -256;		MinOffset = -256;
MaxOffset = 255;		MaxOffset = 255;
break;		break;
case AArch64::LDR_PXI:		case AArch64::LDR_PXI:
case AArch64::STR_PXI:		case AArch64::STR_PXI:
Scale = Width = 2;		Scale = 2;
		Width = SVEMaxBytesPerVector / 8;
MinOffset = -256;		MinOffset = -256;
MaxOffset = 255;		MaxOffset = 255;
break;		break;
case AArch64::LDR_ZXI:		case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:		case AArch64::STR_ZXI:
Scale = Width = 16;		Scale = 16;
		Width = SVEMaxBytesPerVector;
MinOffset = -256;		MinOffset = -256;
MaxOffset = 255;		MaxOffset = 255;
break;		break;
		case AArch64::LD1B_IMM:
		case AArch64::LD1H_IMM:
		case AArch64::LD1W_IMM:
		case AArch64::LD1D_IMM:
		case AArch64::ST1B_IMM:
		case AArch64::ST1H_IMM:
		case AArch64::ST1W_IMM:
		case AArch64::ST1D_IMM:
		// A full vectors worth of data
		// Width = mbytes * elements
		Scale = 16;
		efriedmaUnsubmitted Done Reply Inline Actions This seems sort of confusing. "Scale" here is implicitly multiplied by vl, and there's isn't any way for the caller to tell except by checking the opcode. efriedma: This seems sort of confusing. "Scale" here is implicitly multiplied by vl, and there's isn't…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions I'm not sure if is an actual issue in practice though. Are you suggesting to make Scale a `TypeSize` instead of an `unsigned`? sdesmalen: I'm not sure if is an actual issue in practice though. Are you suggesting to make Scale a…
		efriedmaUnsubmitted Done Reply Inline Actions Yes, that would force the callers to explicitly handle scalable types. It looks like some of them don't. efriedma: Yes, that would force the callers to explicitly handle scalable types. It looks like some of…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Given that this is a change propagates through the rest of the code-base, I will do this in a separate patch. sdesmalen: Given that this is a change propagates through the rest of the code-base, I will do this in a…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions I've implemented this change in D72758. sdesmalen: I've implemented this change in D72758.
		Width = SVEMaxBytesPerVector;
		MinOffset = -8;
		MaxOffset = 7;
		break;
case AArch64::ST2GOffset:		case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:		case AArch64::STZ2GOffset:
Scale = 16;		Scale = 16;
Width = 32;		Width = 32;
MinOffset = -256;		MinOffset = -256;
MaxOffset = 255;		MaxOffset = 255;
break;		break;
case AArch64::STGPi:		case AArch64::STGPi:
▲ Show 20 Lines • Show All 1,187 Lines • ▼ Show 20 Lines
}		}

static bool isSVEScaledImmInstruction(unsigned Opcode) {		static bool isSVEScaledImmInstruction(unsigned Opcode) {
switch (Opcode) {		switch (Opcode) {
case AArch64::LDR_ZXI:		case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:		case AArch64::STR_ZXI:
case AArch64::LDR_PXI:		case AArch64::LDR_PXI:
case AArch64::STR_PXI:		case AArch64::STR_PXI:
		case AArch64::LD1B_IMM:
		case AArch64::LD1H_IMM:
		case AArch64::LD1W_IMM:
		case AArch64::LD1D_IMM:
		case AArch64::ST1B_IMM:
		case AArch64::ST1H_IMM:
		case AArch64::ST1W_IMM:
		case AArch64::ST1D_IMM:
return true;		return true;
default:		default:
return false;		return false;
}		}
}		}

int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,		int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
StackOffset &SOffset,		StackOffset &SOffset,
▲ Show 20 Lines • Show All 3,190 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

	Show First 20 Lines • Show All 1,253 Lines • ▼ Show 20 Lines
	defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRI>;			defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRI>;
	defm : pred_load<nxv8i16, nxv8i1, non_temporal_load, LDNT1H_ZRI>;			defm : pred_load<nxv8i16, nxv8i1, non_temporal_load, LDNT1H_ZRI>;
	defm : pred_load<nxv4i32, nxv4i1, non_temporal_load, LDNT1W_ZRI>;			defm : pred_load<nxv4i32, nxv4i1, non_temporal_load, LDNT1W_ZRI>;
	defm : pred_load<nxv2i64, nxv2i1, non_temporal_load, LDNT1D_ZRI>;			defm : pred_load<nxv2i64, nxv2i1, non_temporal_load, LDNT1D_ZRI>;

	defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRI>;			defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRI>;
	defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRI>;			defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRI>;
	defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRI>;			defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRI>;
	defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRI>;			defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRI>;
				efriedmaUnsubmitted Done Reply Inline Actions IsLE? Are we supposed to do something different on big-endian targets? efriedma: IsLE? Are we supposed to do something different on big-endian targets?
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions No, that was a misunderstanding on my part. I've removed this now. sdesmalen: No, that was a misunderstanding on my part. I've removed this now.

				multiclass unpred_store<ValueType Ty, Instruction RegImmInst, Instruction PTrue> {
				def _fi : Pat<(store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)),
				(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
				}

				defm Pat_ST1B : unpred_store<nxv16i8, ST1B_IMM, PTRUE_B>;
				defm Pat_ST1H : unpred_store<nxv8i16, ST1H_IMM, PTRUE_H>;
				defm Pat_ST1W : unpred_store<nxv4i32, ST1W_IMM, PTRUE_S>;
				defm Pat_ST1D : unpred_store<nxv2i64, ST1D_IMM, PTRUE_D>;
				defm Pat_ST1H_float16: unpred_store<nxv8f16, ST1H_IMM, PTRUE_H>;
				defm Pat_ST1W_float : unpred_store<nxv4f32, ST1W_IMM, PTRUE_S>;
				defm Pat_ST1D_double : unpred_store<nxv2f64, ST1D_IMM, PTRUE_D>;
				efriedmaUnsubmitted Done Reply Inline Actions Should we always use PTRUE_B, even for non-byte element sizes, to encourage CSE? Should we prefer to use ldr/str where legal, to take advantage of the larger immediate offset? efriedma: Should we always use PTRUE_B, even for non-byte element sizes, to encourage CSE? Should we…
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions Should we always use PTRUE_B, even for non-byte element sizes, to encourage CSE? Our experience is that vectorized loops have most predicates CSEd anyway. For a loop that operates on two lanes, often a predicate is already available and there is no need to introduce an extra `ptrue_b`. If a loop using floats is vectorized with VF=2, we don't want operations on `<vscale x 2 x float>` to use `ptrue.b` because that would enable operations on all (vscale x) 4 lanes, which may not be valid. Should we prefer to use ldr/str where legal, to take advantage of the larger immediate offset? That would not be endian safe, hence the preference to use ST1 (note that the order is dictated by the AAPCS for when passing the vectors by reference). This case of saving/restoring to/from the stack like this is pretty rare. Normal spills and fills will indeed use the STR/LDR instructions. And normal load/store vector instructions that are not storing to a local will likely use other addressing modes like reg+reg. sdesmalen: > Should we always use PTRUE_B, even for non-byte element sizes, to encourage CSE? Our…
				efriedmaUnsubmitted Done Reply Inline Actions Okay, that makes sense. For the CSE thing, we could maybe add an optimization pass after isel if it's necessary. efriedma: Okay, that makes sense. For the CSE thing, we could maybe add an optimization pass after isel…

				multiclass unpred_load<ValueType Ty, Instruction RegImmInst, Instruction PTrue> {
				def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm4s1:$offset))),
				(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
				}

				defm Pat_LD1B : unpred_load<nxv16i8, LD1B_IMM, PTRUE_B>;
				defm Pat_LD1H : unpred_load<nxv8i16, LD1H_IMM, PTRUE_H>;
				defm Pat_LD1W : unpred_load<nxv4i32, LD1W_IMM, PTRUE_S>;
				defm Pat_LD1D : unpred_load<nxv2i64, LD1D_IMM, PTRUE_D>;
				defm Pat_LD1H_float16: unpred_load<nxv8f16, LD1H_IMM, PTRUE_H>;
				defm Pat_LD1W_float : unpred_load<nxv4f32, LD1W_IMM, PTRUE_S>;
				defm Pat_LD1D_double : unpred_load<nxv2f64, LD1D_IMM, PTRUE_D>;

				multiclass unpred_store_predicate<ValueType Ty, Instruction Store> {
				def _fi : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)),
				(Store PPR:$val, GPR64sp:$base, simm9:$offset)>;
				}

				defm Pat_Store_P16 : unpred_store_predicate<nxv16i1, STR_PXI>;
				defm Pat_Store_P8 : unpred_store_predicate<nxv8i1, STR_PXI>;
				defm Pat_Store_P4 : unpred_store_predicate<nxv4i1, STR_PXI>;
				defm Pat_Store_P2 : unpred_store_predicate<nxv2i1, STR_PXI>;

				multiclass unpred_load_predicate<ValueType Ty, Instruction Load> {
				def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm9:$offset))),
				(Load GPR64sp:$base, simm9:$offset)>;
				}

				defm Pat_Load_P16 : unpred_load_predicate<nxv16i1, LDR_PXI>;
				defm Pat_Load_P8 : unpred_load_predicate<nxv8i1, LDR_PXI>;
				defm Pat_Load_P4 : unpred_load_predicate<nxv4i1, LDR_PXI>;
				defm Pat_Load_P2 : unpred_load_predicate<nxv2i1, LDR_PXI>;
				efriedmaUnsubmitted Not Done Reply Inline Actions nxv2i1 has the same memory layout as nxv16i1? I guess that makes sense given the available instructions. We might need to modify the datalayout to make that work properly; I think, without any explicit guidance from the layout string, it will assume a nxv2i1 load reads "vscale" bytes, not "vscale * 2" bytes. Not something to change in this patch, of course. efriedma: nxv2i1 has the same memory layout as nxv16i1? I guess that makes sense given the available…
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions DataLayout assumes that each boolean has a memory size of `i8` as each predicate needs to be individually addressable, which leads to `storesize(<vscale x 2 x i1>) == storesize(<vscale x 2 x i8>)`. This also means that alloca's of predicates may allocate too much stack space depending on the number of elements. The generated code is correct because all the offsets scale accordingly; you can see this for example in `spill_nxv16i1` and `spill_nxv2i1`. The former (nxv16i1) allocates the sizeof two nxv16i8 vectors and loads the second predicate from offset `8 [* sizeof(predicate)]`, where the latter allocates the sizeof one nxv16i8 vector, and loads the second predicate from offset `2 [* sizeof(predicate)]`. (`sizeof(predicate) = (vscale * 2 bytes)`) This is different from spills introduced by e.g. the register allocator, where LLVM allocates space for the size of an (otherwise opaque) predicate register, set to 2 bytes. sdesmalen: DataLayout assumes that each boolean has a memory size of `i8` as each predicate needs to be…
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions I should probably also point out that there is no other interface for users to read/write these predicates other than through `svbool_t`, which is an opaque type, so I don't think there is any need to expand the store of `nxv16i1` to a store of `nxv16i8`. sdesmalen: I should probably also point out that there is no other interface for users to read/write these…
				efriedmaUnsubmitted Not Done Reply Inline Actions DataLayout assumes that each boolean has a memory size of i8 as each predicate needs to be individually addressable The whole area is still messy, unfortunately. Like I stated before, the "store size" for vectors assumes the bits are tightly packed. For non-scalable vectors, SelectionDAG legalization assumes the bits are tightly packed. (I think we fixed all the legalization routines to be consistent with this.) And for AVX-512, loads and stores of `<16 x i1>` etc. are lowered to bit-packed operations (kmovw). I just did some quick tests, though, and unfortunately, it looks like the alignment (and therefore the allocation size) is messed up. The alignment of vectors is currently based on the alignment of the element type, not the size of the vector, so it's much larger than the store size for `<N x i1>`. Unless the store size is exactly 64 or 128 bits wide, in which case the alignment is 64/128 bits respectively. Probably someone needs to spend more time in this area at some point. efriedma: > DataLayout assumes that each boolean has a memory size of i8 as each predicate needs to be…

	multiclass ldnf1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT> {			multiclass ldnf1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT> {
	// base			// base
	def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)),			def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)),
	(I PPR:$gp, GPR64sp:$base, (i64 0))>;			(I PPR:$gp, GPR64sp:$base, (i64 0))>;
	}			}

	// 2-element contiguous non-faulting loads			// 2-element contiguous non-faulting loads
	defm : ldnf1<LDNF1B_D_IMM, nxv2i64, AArch64ldnf1, nxv2i1, nxv2i8>;			defm : ldnf1<LDNF1B_D_IMM, nxv2i64, AArch64ldnf1, nxv2i1, nxv2i8>;
	▲ Show 20 Lines • Show All 433 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

	Show First 20 Lines • Show All 652 Lines • ▼ Show 20 Lines
	// The number of bits in a SVE register is architecturally defined			// The number of bits in a SVE register is architecturally defined
	// to be a multiple of this value. If <M x t> has this number of bits,			// to be a multiple of this value. If <M x t> has this number of bits,
	// a <n x M x t> vector can be stored in a SVE register without any			// a <n x M x t> vector can be stored in a SVE register without any
	// redundant bits. If <M x t> has this number of bits divided by P,			// redundant bits. If <M x t> has this number of bits divided by P,
	// a <n x M x t> vector is stored in a SVE register by placing index i			// a <n x M x t> vector is stored in a SVE register by placing index i
	// in index iP of a <n x (MP) x t> vector. The other elements of the			// in index iP of a <n x (MP) x t> vector. The other elements of the
	// <n x (M*P) x t> vector (such as index 1) are undefined.			// <n x (M*P) x t> vector (such as index 1) are undefined.
	static constexpr unsigned SVEBitsPerBlock = 128;			static constexpr unsigned SVEBitsPerBlock = 128;
				static constexpr unsigned SVEMaxBitsPerVector = 2048;
	const unsigned NeonBitsPerVector = 128;			const unsigned NeonBitsPerVector = 128;
	} // end namespace AArch64			} // end namespace AArch64
	} // end namespace llvm			} // end namespace llvm

	#endif			#endif

llvm/test/CodeGen/AArch64/spillfill-sve.ll

This file was added.

				; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s \| FileCheck %s

				; This file checks that unpredicated load/store instructions to locals
				; use the right instructions and offsets.

				; Data fills

				define void @fill_nxv16i8() {
				; CHECK-LABEL: fill_nxv16i8
				; CHECK-DAG: ld1b { z{{[01]}}.b }, p0/z, [sp]
				; CHECK-DAG: ld1b { z{{[01]}}.b }, p0/z, [sp, #1, mul vl]
				%local0 = alloca <vscale x 16 x i8>
				%local1 = alloca <vscale x 16 x i8>
				load volatile <vscale x 16 x i8>, <vscale x 16 x i8>* %local0
				load volatile <vscale x 16 x i8>, <vscale x 16 x i8>* %local1
				ret void
				}

				define void @fill_nxv8i16() {
				; CHECK-LABEL: fill_nxv8i16
				; CHECK-DAG: ld1h { z{{[01]}}.h }, p0/z, [sp]
				; CHECK-DAG: ld1h { z{{[01]}}.h }, p0/z, [sp, #1, mul vl]
				%local0 = alloca <vscale x 8 x i16>
				%local1 = alloca <vscale x 8 x i16>
				load volatile <vscale x 8 x i16>, <vscale x 8 x i16>* %local0
				load volatile <vscale x 8 x i16>, <vscale x 8 x i16>* %local1
				ret void
				}

				define void @fill_nxv4i32() {
				; CHECK-LABEL: fill_nxv4i32
				; CHECK-DAG: ld1w { z{{[01]}}.s }, p0/z, [sp]
				; CHECK-DAG: ld1w { z{{[01]}}.s }, p0/z, [sp, #1, mul vl]
				%local0 = alloca <vscale x 4 x i32>
				%local1 = alloca <vscale x 4 x i32>
				load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* %local0
				load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* %local1
				ret void
				}

				define void @fill_nxv2i64() {
				; CHECK-LABEL: fill_nxv2i64
				; CHECK-DAG: ld1d { z{{[01]}}.d }, p0/z, [sp]
				; CHECK-DAG: ld1d { z{{[01]}}.d }, p0/z, [sp, #1, mul vl]
				%local0 = alloca <vscale x 2 x i64>
				%local1 = alloca <vscale x 2 x i64>
				load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %local0
				load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %local1
				ret void
				}


				; Data spills

				define void @spill_nxv16i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1) {
				; CHECK-LABEL: spill_nxv16i8
				; CHECK-DAG: st1b { z{{[01]}}.b }, p0, [sp]
				; CHECK-DAG: st1b { z{{[01]}}.b }, p0, [sp, #1, mul vl]
				%local0 = alloca <vscale x 16 x i8>
				%local1 = alloca <vscale x 16 x i8>
				store volatile <vscale x 16 x i8> %v0, <vscale x 16 x i8>* %local0
				store volatile <vscale x 16 x i8> %v1, <vscale x 16 x i8>* %local1
				ret void
				}

				define void @spill_nxv8i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1) {
				; CHECK-LABEL: spill_nxv8i16
				; CHECK-DAG: st1h { z{{[01]}}.h }, p0, [sp]
				; CHECK-DAG: st1h { z{{[01]}}.h }, p0, [sp, #1, mul vl]
				%local0 = alloca <vscale x 8 x i16>
				%local1 = alloca <vscale x 8 x i16>
				store volatile <vscale x 8 x i16> %v0, <vscale x 8 x i16>* %local0
				store volatile <vscale x 8 x i16> %v1, <vscale x 8 x i16>* %local1
				ret void
				}

				define void @spill_nxv4i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1) {
				; CHECK-LABEL: spill_nxv4i32
				; CHECK-DAG: st1w { z{{[01]}}.s }, p0, [sp]
				; CHECK-DAG: st1w { z{{[01]}}.s }, p0, [sp, #1, mul vl]
				%local0 = alloca <vscale x 4 x i32>
				%local1 = alloca <vscale x 4 x i32>
				store volatile <vscale x 4 x i32> %v0, <vscale x 4 x i32>* %local0
				store volatile <vscale x 4 x i32> %v1, <vscale x 4 x i32>* %local1
				ret void
				}

				define void @spill_nxv2i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1) {
				; CHECK-LABEL: spill_nxv2i64
				; CHECK-DAG: st1d { z{{[01]}}.d }, p0, [sp]
				; CHECK-DAG: st1d { z{{[01]}}.d }, p0, [sp, #1, mul vl]
				%local0 = alloca <vscale x 2 x i64>
				%local1 = alloca <vscale x 2 x i64>
				store volatile <vscale x 2 x i64> %v0, <vscale x 2 x i64>* %local0
				store volatile <vscale x 2 x i64> %v1, <vscale x 2 x i64>* %local1
				ret void
				}

				; Predicate fills

				define void @fill_nxv16i1() {
				; CHECK-LABEL: fill_nxv16i1
				; CHECK-DAG: ldr p{{[01]}}, [sp, #8, mul vl]
				; CHECK-DAG: ldr p{{[01]}}, [sp]
				%local0 = alloca <vscale x 16 x i1>
				%local1 = alloca <vscale x 16 x i1>
				load volatile <vscale x 16 x i1>, <vscale x 16 x i1>* %local0
				load volatile <vscale x 16 x i1>, <vscale x 16 x i1>* %local1
				ret void
				}

				define void @fill_nxv8i1() {
				; CHECK-LABEL: fill_nxv8i1
				; CHECK-DAG: ldr p{{[01]}}, [sp, #4, mul vl]
				; CHECK-DAG: ldr p{{[01]}}, [sp]
				%local0 = alloca <vscale x 8 x i1>
				%local1 = alloca <vscale x 8 x i1>
				load volatile <vscale x 8 x i1>, <vscale x 8 x i1>* %local0
				load volatile <vscale x 8 x i1>, <vscale x 8 x i1>* %local1
				ret void
				}

				define void @fill_nxv4i1() {
				; CHECK-LABEL: fill_nxv4i1
				; CHECK-DAG: ldr p{{[01]}}, [sp, #6, mul vl]
				; CHECK-DAG: ldr p{{[01]}}, [sp, #4, mul vl]
				%local0 = alloca <vscale x 4 x i1>
				%local1 = alloca <vscale x 4 x i1>
				load volatile <vscale x 4 x i1>, <vscale x 4 x i1>* %local0
				load volatile <vscale x 4 x i1>, <vscale x 4 x i1>* %local1
				ret void
				}

				define void @fill_nxv2i1() {
				; CHECK-LABEL: fill_nxv2i1
				; CHECK-DAG: ldr p{{[01]}}, [sp, #7, mul vl]
				; CHECK-DAG: ldr p{{[01]}}, [sp, #6, mul vl]
				%local0 = alloca <vscale x 2 x i1>
				%local1 = alloca <vscale x 2 x i1>
				load volatile <vscale x 2 x i1>, <vscale x 2 x i1>* %local0
				load volatile <vscale x 2 x i1>, <vscale x 2 x i1>* %local1
				ret void
				}

				; Predicate spills

				define void @spill_nxv16i1(<vscale x 16 x i1> %v0, <vscale x 16 x i1> %v1) {
				; CHECK-LABEL: spill_nxv16i1
				; CHECK-DAG: str p{{[01]}}, [sp, #8, mul vl]
				; CHECK-DAG: str p{{[01]}}, [sp]
				%local0 = alloca <vscale x 16 x i1>
				%local1 = alloca <vscale x 16 x i1>
				store volatile <vscale x 16 x i1> %v0, <vscale x 16 x i1>* %local0
				store volatile <vscale x 16 x i1> %v1, <vscale x 16 x i1>* %local1
				ret void
				}

				define void @spill_nxv8i1(<vscale x 8 x i1> %v0, <vscale x 8 x i1> %v1) {
				; CHECK-LABEL: spill_nxv8i1
				; CHECK-DAG: str p{{[01]}}, [sp, #4, mul vl]
				; CHECK-DAG: str p{{[01]}}, [sp]
				%local0 = alloca <vscale x 8 x i1>
				%local1 = alloca <vscale x 8 x i1>
				store volatile <vscale x 8 x i1> %v0, <vscale x 8 x i1>* %local0
				store volatile <vscale x 8 x i1> %v1, <vscale x 8 x i1>* %local1
				ret void
				}

				define void @spill_nxv4i1(<vscale x 4 x i1> %v0, <vscale x 4 x i1> %v1) {
				; CHECK-LABEL: spill_nxv4i1
				; CHECK-DAG: str p{{[01]}}, [sp, #6, mul vl]
				; CHECK-DAG: str p{{[01]}}, [sp, #4, mul vl]
				%local0 = alloca <vscale x 4 x i1>
				%local1 = alloca <vscale x 4 x i1>
				store volatile <vscale x 4 x i1> %v0, <vscale x 4 x i1>* %local0
				store volatile <vscale x 4 x i1> %v1, <vscale x 4 x i1>* %local1
				ret void
				}

				define void @spill_nxv2i1(<vscale x 2 x i1> %v0, <vscale x 2 x i1> %v1) {
				; CHECK-LABEL: spill_nxv2i1
				; CHECK-DAG: str p{{[01]}}, [sp, #7, mul vl]
				; CHECK-DAG: str p{{[01]}}, [sp, #6, mul vl]
				%local0 = alloca <vscale x 2 x i1>
				%local1 = alloca <vscale x 2 x i1>
				store volatile <vscale x 2 x i1> %v0, <vscale x 2 x i1>* %local0
				store volatile <vscale x 2 x i1> %v1, <vscale x 2 x i1>* %local1
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add patterns for unpredicated load/store to frame-indices.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 239573

llvm/include/llvm/Analysis/MemoryLocation.h

llvm/lib/Analysis/Loads.cpp

llvm/lib/CodeGen/CodeGenPrepare.cpp

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrFormats.td

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

llvm/test/CodeGen/AArch64/spillfill-sve.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add patterns for unpredicated load/store to frame-indices.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 239573

llvm/include/llvm/Analysis/MemoryLocation.h

llvm/lib/Analysis/Loads.cpp

llvm/lib/CodeGen/CodeGenPrepare.cpp

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrFormats.td

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

llvm/test/CodeGen/AArch64/spillfill-sve.ll

[AArch64][SVE] Add patterns for unpredicated load/store to frame-indices.
ClosedPublic