Diff 436308

llvm/lib/Target/AArch64/AArch64.td

Show First 20 Lines • Show All 209 Lines • ▼ Show 20 Lines	def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;		"UsePostRAScheduler", "true", "Schedule again after register allocation">;

def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",		def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
"IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">;		"IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">;

def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",		def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
"IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">;		"IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">;

		def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address",
		"IsStoreAddressAscend", "false",
		dmgreenUnsubmitted Done Reply Inline Actions Capitalize Schedule. Maybe reword as "Schedule vector stores by ascending address". dmgreen: Capitalize Schedule. Maybe reword as "Schedule vector stores by ascending address".
		AllenAuthorUnsubmitted Done Reply Inline Actions Thanks, apply your comment. Allen: Thanks, apply your comment.
		dmgreenUnsubmitted Not Done Reply Inline Actions A q store is a store of a vector. This subtarget feature doesn't alter the scalar scores like x/w/s. dmgreen: A q store is a store of a vector. This subtarget feature doesn't alter the scalar scores like…
		"Schedule scalar stores by ascending address">;

def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow",		def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow",
"true", "STR of Q register with register offset is slow">;		"true", "STR of Q register with register offset is slow">;

def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<		def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",		"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
"true", "Use alternative pattern for sextload convert to f32">;		"true", "Use alternative pattern for sextload convert to f32">;

def FeatureArithmeticBccFusion : SubtargetFeature<		def FeatureArithmeticBccFusion : SubtargetFeature<
▲ Show 20 Lines • Show All 1,054 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Show First 20 Lines • Show All 97 Lines • ▼ Show 20 Lines	public:
static bool isPreLd(const MachineInstr &MI);		static bool isPreLd(const MachineInstr &MI);

/// Returns whether the instruction is a pre-indexed store.		/// Returns whether the instruction is a pre-indexed store.
static bool isPreSt(const MachineInstr &MI);		static bool isPreSt(const MachineInstr &MI);

/// Returns whether the instruction is a pre-indexed load/store.		/// Returns whether the instruction is a pre-indexed load/store.
static bool isPreLdSt(const MachineInstr &MI);		static bool isPreLdSt(const MachineInstr &MI);

		/// Returns whether the instruction is a paired load/store.
		dmgreenUnsubmitted Done Reply Inline Actions Can you add a /// doc string dmgreen: Can you add a /// doc string
		static bool isPairedLdSt(const MachineInstr &MI);

		/// Returns the base register operator of a load/store.
		static const MachineOperand &getLdStBaseOp(const MachineInstr &MI);

		/// Returns the the immediate offset operator of a load/store.
		static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI);

/// Returns whether the instruction is FP or NEON.		/// Returns whether the instruction is FP or NEON.
static bool isFpOrNEON(const MachineInstr &MI);		static bool isFpOrNEON(const MachineInstr &MI);

/// Returns whether the instruction is in Q form (128 bit operands)		/// Returns whether the instruction is in Q form (128 bit operands)
static bool isQForm(const MachineInstr &MI);		static bool isQForm(const MachineInstr &MI);

/// Returns the index for the immediate for a given instruction.		/// Returns the index for the immediate for a given instruction.
static unsigned getLoadStoreImmIdx(unsigned Opc);		static unsigned getLoadStoreImmIdx(unsigned Opc);
▲ Show 20 Lines • Show All 373 Lines • ▼ Show 20 Lines	case AArch64::PTRUE_D:
return true;		return true;
default:		default:
return false;		return false;
}		}
}		}

/// Return opcode to be used for indirect calls.		/// Return opcode to be used for indirect calls.
unsigned getBLRCallOpcode(const MachineFunction &MF);		unsigned getBLRCallOpcode(const MachineFunction &MF);

// struct TSFlags {		// struct TSFlags {
		dmgreenUnsubmitted Done Reply Inline Actions I think you can probably move these into AArch64InstrInfo, near to isPairedLdSt dmgreen: I think you can probably move these into AArch64InstrInfo, near to isPairedLdSt
		AllenAuthorUnsubmitted Done Reply Inline Actions Done, thanks! Allen: Done, thanks!
#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits		#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bits		#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bits
#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits		#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits
#define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits		#define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits
// }		// }

namespace AArch64 {		namespace AArch64 {

▲ Show 20 Lines • Show All 46 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,146 Lines • ▼ Show 20 Lines	case AArch64::STRQpre:
return true;		return true;
}		}
}		}

bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {		bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
return isPreLd(MI) \|\| isPreSt(MI);		return isPreLd(MI) \|\| isPreSt(MI);
}		}

		bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
		switch (MI.getOpcode()) {
		default:
		return false;
		case AArch64::LDPSi:
		case AArch64::LDPSWi:
		case AArch64::LDPDi:
		case AArch64::LDPQi:
		case AArch64::LDPWi:
		case AArch64::LDPXi:
		case AArch64::STPSi:
		case AArch64::STPDi:
		case AArch64::STPQi:
		case AArch64::STPWi:
		case AArch64::STPXi:
		case AArch64::STGPi:
		return true;
		}
		}

		const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
		unsigned Idx =
		AArch64InstrInfo::isPairedLdSt(MI) \|\| AArch64InstrInfo::isPreLdSt(MI) ? 2
		: 1;
		return MI.getOperand(Idx);
		}

		const MachineOperand &
		AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
		unsigned Idx =
		AArch64InstrInfo::isPairedLdSt(MI) \|\| AArch64InstrInfo::isPreLdSt(MI) ? 3
		: 2;
		return MI.getOperand(Idx);
		}

static const TargetRegisterClass *getRegClass(const MachineInstr &MI,		static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
Register Reg) {		Register Reg) {
if (MI.getParent() == nullptr)		if (MI.getParent() == nullptr)
return nullptr;		return nullptr;
const MachineFunction *MF = MI.getParent()->getParent();		const MachineFunction *MF = MI.getParent()->getParent();
return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;		return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
}		}

▲ Show 20 Lines • Show All 4,850 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Show First 20 Lines • Show All 550 Lines • ▼ Show 20 Lines	case AArch64::ST2GOffset:
return AArch64::ST2GPostIndex;		return AArch64::ST2GPostIndex;
case AArch64::STZ2GOffset:		case AArch64::STZ2GOffset:
return AArch64::STZ2GPostIndex;		return AArch64::STZ2GPostIndex;
case AArch64::STGPi:		case AArch64::STGPi:
return AArch64::STGPpost;		return AArch64::STGPpost;
}		}
}		}

static bool isPairedLdSt(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case AArch64::LDPSi:
case AArch64::LDPSWi:
case AArch64::LDPDi:
case AArch64::LDPQi:
case AArch64::LDPWi:
case AArch64::LDPXi:
case AArch64::STPSi:
case AArch64::STPDi:
case AArch64::STPQi:
case AArch64::STPWi:
case AArch64::STPXi:
case AArch64::STGPi:
return true;
}
}

static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {		static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {

unsigned OpcA = FirstMI.getOpcode();		unsigned OpcA = FirstMI.getOpcode();
unsigned OpcB = MI.getOpcode();		unsigned OpcB = MI.getOpcode();

switch (OpcA) {		switch (OpcA) {
default:		default:
return false;		return false;
Show All 18 Lines	static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {
case AArch64::LDRXpre:		case AArch64::LDRXpre:
return (OpcB == AArch64::LDRXui) \|\| (OpcB == AArch64::LDURXi);		return (OpcB == AArch64::LDRXui) \|\| (OpcB == AArch64::LDURXi);
}		}
}		}

// Returns the scale and offset range of pre/post indexed variants of MI.		// Returns the scale and offset range of pre/post indexed variants of MI.
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,		static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
int &MinOffset, int &MaxOffset) {		int &MinOffset, int &MaxOffset) {
bool IsPaired = isPairedLdSt(MI);		bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
bool IsTagStore = isTagStore(MI);		bool IsTagStore = isTagStore(MI);
// ST*G and all paired ldst have the same scale in pre/post-indexed variants		// ST*G and all paired ldst have the same scale in pre/post-indexed variants
// as in the "unsigned offset" variant.		// as in the "unsigned offset" variant.
// All other pre/post indexed ldst instructions are unscaled.		// All other pre/post indexed ldst instructions are unscaled.
Scale = (IsTagStore \|\| IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;		Scale = (IsTagStore \|\| IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;

if (IsPaired) {		if (IsPaired) {
MinOffset = -64;		MinOffset = -64;
MaxOffset = 63;		MaxOffset = 63;
} else {		} else {
MinOffset = -256;		MinOffset = -256;
MaxOffset = 255;		MaxOffset = 255;
}		}
}		}

static MachineOperand &getLdStRegOp(MachineInstr &MI,		static MachineOperand &getLdStRegOp(MachineInstr &MI,
unsigned PairedRegOp = 0) {		unsigned PairedRegOp = 0) {
assert(PairedRegOp < 2 && "Unexpected register operand idx.");		assert(PairedRegOp < 2 && "Unexpected register operand idx.");
bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);		bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
if (IsPreLdSt)		if (IsPreLdSt)
PairedRegOp += 1;		PairedRegOp += 1;
unsigned Idx = isPairedLdSt(MI) \|\| IsPreLdSt ? PairedRegOp : 0;		unsigned Idx =
return MI.getOperand(Idx);		AArch64InstrInfo::isPairedLdSt(MI) \|\| IsPreLdSt ? PairedRegOp : 0;
}

static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) {
unsigned Idx = isPairedLdSt(MI) \|\| AArch64InstrInfo::isPreLdSt(MI) ? 2 : 1;
return MI.getOperand(Idx);
}

static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) {
unsigned Idx = isPairedLdSt(MI) \|\| AArch64InstrInfo::isPreLdSt(MI) ? 3 : 2;
return MI.getOperand(Idx);		return MI.getOperand(Idx);
}		}

static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,		static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
MachineInstr &StoreInst,		MachineInstr &StoreInst,
const AArch64InstrInfo *TII) {		const AArch64InstrInfo *TII) {
assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");		assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
int LoadSize = TII->getMemScale(LoadInst);		int LoadSize = TII->getMemScale(LoadInst);
int StoreSize = TII->getMemScale(StoreInst);		int StoreSize = TII->getMemScale(StoreInst);
int UnscaledStOffset = TII->hasUnscaledLdStOffset(StoreInst)		int UnscaledStOffset =
? getLdStOffsetOp(StoreInst).getImm()		TII->hasUnscaledLdStOffset(StoreInst)
: getLdStOffsetOp(StoreInst).getImm() * StoreSize;		? AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm()
int UnscaledLdOffset = TII->hasUnscaledLdStOffset(LoadInst)		: AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm() * StoreSize;
? getLdStOffsetOp(LoadInst).getImm()		int UnscaledLdOffset =
: getLdStOffsetOp(LoadInst).getImm() * LoadSize;		TII->hasUnscaledLdStOffset(LoadInst)
		? AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm()
		: AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm() * LoadSize;
return (UnscaledStOffset <= UnscaledLdOffset) &&		return (UnscaledStOffset <= UnscaledLdOffset) &&
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));		(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
}		}

static bool isPromotableZeroStoreInst(MachineInstr &MI) {		static bool isPromotableZeroStoreInst(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();		unsigned Opc = MI.getOpcode();
return (Opc == AArch64::STRWui \|\| Opc == AArch64::STURWi \|\|		return (Opc == AArch64::STRWui \|\| Opc == AArch64::STURWi \|\|
isNarrowStore(Opc)) &&		isNarrowStore(Opc)) &&
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines	static bool isMergeableLdStUpdate(MachineInstr &MI) {
case AArch64::LDPWi:		case AArch64::LDPWi:
case AArch64::LDPXi:		case AArch64::LDPXi:
case AArch64::STPSi:		case AArch64::STPSi:
case AArch64::STPDi:		case AArch64::STPDi:
case AArch64::STPQi:		case AArch64::STPQi:
case AArch64::STPWi:		case AArch64::STPWi:
case AArch64::STPXi:		case AArch64::STPXi:
// Make sure this is a reg+imm (as opposed to an address reloc).		// Make sure this is a reg+imm (as opposed to an address reloc).
if (!getLdStOffsetOp(MI).isImm())		if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm())
return false;		return false;

return true;		return true;
}		}
}		}

MachineBasicBlock::iterator		MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,		AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
Show All 17 Lines	AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,

bool MergeForward = Flags.getMergeForward();		bool MergeForward = Flags.getMergeForward();
// Insert our new paired instruction after whichever of the paired		// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.		// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;		MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
// Also based on MergeForward is from where we copy the base register operand		// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.		// so we get the flags compatible with the input code.
const MachineOperand &BaseRegOp =		const MachineOperand &BaseRegOp =
MergeForward ? getLdStBaseOp(MergeMI) : getLdStBaseOp(I);		MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
		: AArch64InstrInfo::getLdStBaseOp(*I);

// Which register is Rt and which is Rt2 depends on the offset order.		// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI;		MachineInstr *RtMI;
if (getLdStOffsetOp(*I).getImm() ==		if (AArch64InstrInfo::getLdStOffsetOp(*I).getImm() ==
getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)		AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
RtMI = &*MergeMI;		RtMI = &*MergeMI;
else		else
RtMI = &*I;		RtMI = &*I;

int OffsetImm = getLdStOffsetOp(*RtMI).getImm();		int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
// Change the scaled offset from small to large type.		// Change the scaled offset from small to large type.
if (IsScaled) {		if (IsScaled) {
assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");		assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
OffsetImm /= 2;		OffsetImm /= 2;
}		}

// Construct the new instruction.		// Construct the new instruction.
DebugLoc DL = I->getDebugLoc();		DebugLoc DL = I->getDebugLoc();
▲ Show 20 Lines • Show All 147 Lines • ▼ Show 20 Lines	#endif
}		}

// Insert our new paired instruction after whichever of the paired		// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.		// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;		MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
// Also based on MergeForward is from where we copy the base register operand		// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.		// so we get the flags compatible with the input code.
const MachineOperand &BaseRegOp =		const MachineOperand &BaseRegOp =
MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I);		MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
		: AArch64InstrInfo::getLdStBaseOp(*I);

int Offset = getLdStOffsetOp(*I).getImm();		int Offset = AArch64InstrInfo::getLdStOffsetOp(*I).getImm();
int PairedOffset = getLdStOffsetOp(*Paired).getImm();		int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());		bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
if (IsUnscaled != PairedIsUnscaled) {		if (IsUnscaled != PairedIsUnscaled) {
// We're trying to pair instructions that differ in how they are scaled. If		// We're trying to pair instructions that differ in how they are scaled. If
// I is scaled then scale the offset of Paired accordingly. Otherwise, do		// I is scaled then scale the offset of Paired accordingly. Otherwise, do
// the opposite (i.e., make Paired's offset unscaled).		// the opposite (i.e., make Paired's offset unscaled).
int MemSize = TII->getMemScale(*Paired);		int MemSize = TII->getMemScale(*Paired);
if (PairedIsUnscaled) {		if (PairedIsUnscaled) {
// If the unscaled offset isn't a multiple of the MemSize, we can't		// If the unscaled offset isn't a multiple of the MemSize, we can't
Show All 18 Lines	if (Offset == PairedOffset + OffsetStride &&
// I.e., we turn ldp I, Paired into ldp Paired, I.		// I.e., we turn ldp I, Paired into ldp Paired, I.
// Update the index accordingly.		// Update the index accordingly.
if (SExtIdx != -1)		if (SExtIdx != -1)
SExtIdx = (SExtIdx + 1) % 2;		SExtIdx = (SExtIdx + 1) % 2;
} else {		} else {
RtMI = &*I;		RtMI = &*I;
Rt2MI = &*Paired;		Rt2MI = &*Paired;
}		}
int OffsetImm = getLdStOffsetOp(*RtMI).getImm();		int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
// Scale the immediate offset, if necessary.		// Scale the immediate offset, if necessary.
if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {		if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&		assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
"Unscaled offset cannot be scaled.");		"Unscaled offset cannot be scaled.");
OffsetImm /= TII->getMemScale(*RtMI);		OffsetImm /= TII->getMemScale(*RtMI);
}		}

// Construct the new instruction.		// Construct the new instruction.
▲ Show 20 Lines • Show All 141 Lines • ▼ Show 20 Lines	if (LoadSize == StoreSize && (LoadSize == 4 \|\| LoadSize == 8)) {
// FIXME: Currently we disable this transformation in big-endian targets as		// FIXME: Currently we disable this transformation in big-endian targets as
// performance and correctness are verified only in little-endian.		// performance and correctness are verified only in little-endian.
if (!Subtarget->isLittleEndian())		if (!Subtarget->isLittleEndian())
return NextI;		return NextI;
bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);		bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&		assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
"Unsupported ld/st match");		"Unsupported ld/st match");
assert(LoadSize <= StoreSize && "Invalid load size");		assert(LoadSize <= StoreSize && "Invalid load size");
int UnscaledLdOffset = IsUnscaled		int UnscaledLdOffset =
? getLdStOffsetOp(*LoadI).getImm()		IsUnscaled
: getLdStOffsetOp(LoadI).getImm() LoadSize;		? AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm()
int UnscaledStOffset = IsUnscaled		: AArch64InstrInfo::getLdStOffsetOp(LoadI).getImm() LoadSize;
? getLdStOffsetOp(*StoreI).getImm()		int UnscaledStOffset =
: getLdStOffsetOp(StoreI).getImm() StoreSize;		IsUnscaled
		? AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm()
		: AArch64InstrInfo::getLdStOffsetOp(StoreI).getImm() StoreSize;
int Width = LoadSize * 8;		int Width = LoadSize * 8;
Register DestReg =		Register DestReg =
IsStoreXReg ? Register(TRI->getMatchingSuperReg(		IsStoreXReg ? Register(TRI->getMatchingSuperReg(
LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))		LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
: LdRt;		: LdRt;

assert((UnscaledLdOffset >= UnscaledStOffset &&		assert((UnscaledLdOffset >= UnscaledStOffset &&
(UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&		(UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines
}		}

bool AArch64LoadStoreOpt::findMatchingStore(		bool AArch64LoadStoreOpt::findMatchingStore(
MachineBasicBlock::iterator I, unsigned Limit,		MachineBasicBlock::iterator I, unsigned Limit,
MachineBasicBlock::iterator &StoreI) {		MachineBasicBlock::iterator &StoreI) {
MachineBasicBlock::iterator B = I->getParent()->begin();		MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator MBBI = I;		MachineBasicBlock::iterator MBBI = I;
MachineInstr &LoadMI = *I;		MachineInstr &LoadMI = *I;
Register BaseReg = getLdStBaseOp(LoadMI).getReg();		Register BaseReg = AArch64InstrInfo::getLdStBaseOp(LoadMI).getReg();

// If the load is the first instruction in the block, there's obviously		// If the load is the first instruction in the block, there's obviously
// not any matching store.		// not any matching store.
if (MBBI == B)		if (MBBI == B)
return false;		return false;

// Track which register units have been modified and used between the first		// Track which register units have been modified and used between the first
// insn and the second insn.		// insn and the second insn.
Show All 12 Lines	do {

// If the load instruction reads directly from the address to which the		// If the load instruction reads directly from the address to which the
// store instruction writes and the stored value is not modified, we can		// store instruction writes and the stored value is not modified, we can
// promote the load. Since we do not handle stores with pre-/post-index,		// promote the load. Since we do not handle stores with pre-/post-index,
// it's unnecessary to check if BaseReg is modified by the store itself.		// it's unnecessary to check if BaseReg is modified by the store itself.
// Also we can't handle stores without an immediate offset operand,		// Also we can't handle stores without an immediate offset operand,
// while the operand might be the address for a global variable.		// while the operand might be the address for a global variable.
if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&		if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&		BaseReg == AArch64InstrInfo::getLdStBaseOp(MI).getReg() &&
		AArch64InstrInfo::getLdStOffsetOp(MI).isImm() &&
isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&		isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {		ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
StoreI = MBBI;		StoreI = MBBI;
return true;		return true;
}		}

if (MI.isCall())		if (MI.isCall())
return false;		return false;
▲ Show 20 Lines • Show All 250 Lines • ▼ Show 20 Lines	AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator MBBI = I;		MachineBasicBlock::iterator MBBI = I;
MachineBasicBlock::iterator MBBIWithRenameReg;		MachineBasicBlock::iterator MBBIWithRenameReg;
MachineInstr &FirstMI = *I;		MachineInstr &FirstMI = *I;
MBBI = next_nodbg(MBBI, E);		MBBI = next_nodbg(MBBI, E);

bool MayLoad = FirstMI.mayLoad();		bool MayLoad = FirstMI.mayLoad();
bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);		bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
Register Reg = getLdStRegOp(FirstMI).getReg();		Register Reg = getLdStRegOp(FirstMI).getReg();
Register BaseReg = getLdStBaseOp(FirstMI).getReg();		Register BaseReg = AArch64InstrInfo::getLdStBaseOp(FirstMI).getReg();
int Offset = getLdStOffsetOp(FirstMI).getImm();		int Offset = AArch64InstrInfo::getLdStOffsetOp(FirstMI).getImm();
int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;		int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);		bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);

Optional<bool> MaybeCanRename = None;		Optional<bool> MaybeCanRename = None;
if (!EnableRenaming)		if (!EnableRenaming)
MaybeCanRename = {false};		MaybeCanRename = {false};

SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;		SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
Show All 18 Lines	for (unsigned Count = 0; MBBI != E && Count < Limit;

// Don't count transient instructions towards the search limit since there		// Don't count transient instructions towards the search limit since there
// may be different numbers of them if e.g. debug information is present.		// may be different numbers of them if e.g. debug information is present.
if (!MI.isTransient())		if (!MI.isTransient())
++Count;		++Count;

Flags.setSExtIdx(-1);		Flags.setSExtIdx(-1);
if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&		if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
getLdStOffsetOp(MI).isImm()) {		AArch64InstrInfo::getLdStOffsetOp(MI).isImm()) {
assert(MI.mayLoadOrStore() && "Expected memory operation.");		assert(MI.mayLoadOrStore() && "Expected memory operation.");
// If we've found another instruction with the same opcode, check to see		// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.		// if the base and offset are compatible with our starting instruction.
// These instructions all have scaled immediate operands, so we just		// These instructions all have scaled immediate operands, so we just
// check for +1/-1. Make sure to check the new instruction offset is		// check for +1/-1. Make sure to check the new instruction offset is
// actually an immediate and not a symbolic reference destined for		// actually an immediate and not a symbolic reference destined for
// a relocation.		// a relocation.
Register MIBaseReg = getLdStBaseOp(MI).getReg();		Register MIBaseReg = AArch64InstrInfo::getLdStBaseOp(MI).getReg();
int MIOffset = getLdStOffsetOp(MI).getImm();		int MIOffset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);		bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
if (IsUnscaled != MIIsUnscaled) {		if (IsUnscaled != MIIsUnscaled) {
// We're trying to pair instructions that differ in how they are scaled.		// We're trying to pair instructions that differ in how they are scaled.
// If FirstMI is scaled then scale the offset of MI accordingly.		// If FirstMI is scaled then scale the offset of MI accordingly.
// Otherwise, do the opposite (i.e., make MI's offset unscaled).		// Otherwise, do the opposite (i.e., make MI's offset unscaled).
int MemSize = TII->getMemScale(MI);		int MemSize = TII->getMemScale(MI);
if (MIIsUnscaled) {		if (MIIsUnscaled) {
// If the unscaled offset isn't a multiple of the MemSize, we can't		// If the unscaled offset isn't a multiple of the MemSize, we can't
Show All 14 Lines	if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&

if (BaseReg == MIBaseReg) {		if (BaseReg == MIBaseReg) {
// If the offset of the second ld/st is not equal to the size of the		// If the offset of the second ld/st is not equal to the size of the
// destination register it can’t be paired with a pre-index ld/st		// destination register it can’t be paired with a pre-index ld/st
// pair. Additionally if the base reg is used or modified the operations		// pair. Additionally if the base reg is used or modified the operations
// can't be paired: bail and keep looking.		// can't be paired: bail and keep looking.
if (IsPreLdSt) {		if (IsPreLdSt) {
bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);		bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
bool IsBaseRegUsed =		bool IsBaseRegUsed = !UsedRegUnits.available(
!UsedRegUnits.available(getLdStBaseOp(MI).getReg());		AArch64InstrInfo::getLdStBaseOp(MI).getReg());
bool IsBaseRegModified =		bool IsBaseRegModified = !ModifiedRegUnits.available(
!ModifiedRegUnits.available(getLdStBaseOp(MI).getReg());		AArch64InstrInfo::getLdStBaseOp(MI).getReg());
// If the stored value and the address of the second instruction is		// If the stored value and the address of the second instruction is
// the same, it needs to be using the updated register and therefore		// the same, it needs to be using the updated register and therefore
// it must not be folded.		// it must not be folded.
bool IsMIRegTheSame = TRI->regsOverlap(getLdStRegOp(MI).getReg(),		bool IsMIRegTheSame =
getLdStBaseOp(MI).getReg());		TRI->regsOverlap(getLdStRegOp(MI).getReg(),
		AArch64InstrInfo::getLdStBaseOp(MI).getReg());
if (IsOutOfBounds \|\| IsBaseRegUsed \|\| IsBaseRegModified \|\|		if (IsOutOfBounds \|\| IsBaseRegUsed \|\| IsBaseRegModified \|\|
IsMIRegTheSame) {		IsMIRegTheSame) {
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,		LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
UsedRegUnits, TRI);		UsedRegUnits, TRI);
MemInsns.push_back(&MI);		MemInsns.push_back(&MI);
continue;		continue;
}		}
} else {		} else {
▲ Show 20 Lines • Show All 136 Lines • ▼ Show 20 Lines

static MachineBasicBlock::iterator		static MachineBasicBlock::iterator
maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) {		maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) {
auto End = MI.getParent()->end();		auto End = MI.getParent()->end();
if (MaybeCFI == End \|\|		if (MaybeCFI == End \|\|
MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION \|\|		MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION \|\|
!(MI.getFlag(MachineInstr::FrameSetup) \|\|		!(MI.getFlag(MachineInstr::FrameSetup) \|\|
MI.getFlag(MachineInstr::FrameDestroy)) \|\|		MI.getFlag(MachineInstr::FrameDestroy)) \|\|
getLdStBaseOp(MI).getReg() != AArch64::SP)		AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP)
return End;		return End;

const MachineFunction &MF = *MI.getParent()->getParent();		const MachineFunction &MF = *MI.getParent()->getParent();
unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();		unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];		const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
switch (CFI.getOperation()) {		switch (CFI.getOperation()) {
case MCCFIInstruction::OpDefCfa:		case MCCFIInstruction::OpDefCfa:
case MCCFIInstruction::OpDefCfaOffset:		case MCCFIInstruction::OpDefCfaOffset:
Show All 30 Lines	AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
if (Update->getOpcode() == AArch64::SUBXri)		if (Update->getOpcode() == AArch64::SUBXri)
Value = -Value;		Value = -Value;

unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())		unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
: getPostIndexedOpcode(I->getOpcode());		: getPostIndexedOpcode(I->getOpcode());
MachineInstrBuilder MIB;		MachineInstrBuilder MIB;
int Scale, MinOffset, MaxOffset;		int Scale, MinOffset, MaxOffset;
getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);		getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
if (!isPairedLdSt(*I)) {		if (!AArch64InstrInfo::isPairedLdSt(*I)) {
// Non-paired instruction.		// Non-paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))		MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
.add(getLdStRegOp(*Update))		.add(getLdStRegOp(*Update))
.add(getLdStRegOp(*I))		.add(getLdStRegOp(*I))
.add(getLdStBaseOp(*I))		.add(AArch64InstrInfo::getLdStBaseOp(*I))
.addImm(Value / Scale)		.addImm(Value / Scale)
.setMemRefs(I->memoperands())		.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));		.setMIFlags(I->mergeFlagsWith(*Update));
} else {		} else {
// Paired instruction.		// Paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))		MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
.add(getLdStRegOp(*Update))		.add(getLdStRegOp(*Update))
.add(getLdStRegOp(*I, 0))		.add(getLdStRegOp(*I, 0))
.add(getLdStRegOp(*I, 1))		.add(getLdStRegOp(*I, 1))
.add(getLdStBaseOp(*I))		.add(AArch64InstrInfo::getLdStBaseOp(*I))
.addImm(Value / Scale)		.addImm(Value / Scale)
.setMemRefs(I->memoperands())		.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));		.setMIFlags(I->mergeFlagsWith(*Update));
}		}
if (CFI != E) {		if (CFI != E) {
MachineBasicBlock *MBB = I->getParent();		MachineBasicBlock *MBB = I->getParent();
MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI);		MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI);
}		}
▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines
}		}

MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(		MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {		MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();		MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr &MemMI = *I;		MachineInstr &MemMI = *I;
MachineBasicBlock::iterator MBBI = I;		MachineBasicBlock::iterator MBBI = I;

Register BaseReg = getLdStBaseOp(MemMI).getReg();		Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);		int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
		TII->getMemScale(MemMI);

// Scan forward looking for post-index opportunities. Updating instructions		// Scan forward looking for post-index opportunities. Updating instructions
// can't be formed if the memory instruction doesn't have the offset we're		// can't be formed if the memory instruction doesn't have the offset we're
// looking for.		// looking for.
if (MIUnscaledOffset != UnscaledOffset)		if (MIUnscaledOffset != UnscaledOffset)
return E;		return E;

// If the base register overlaps a source/destination register, we can't		// If the base register overlaps a source/destination register, we can't
// merge the update. This does not apply to tag store instructions which		// merge the update. This does not apply to tag store instructions which
// ignore the address part of the source register.		// ignore the address part of the source register.
// This does not apply to STGPi as well, which does not have unpredictable		// This does not apply to STGPi as well, which does not have unpredictable
// behavior in this case unlike normal stores, and always performs writeback		// behavior in this case unlike normal stores, and always performs writeback
// after reading the source register value.		// after reading the source register value.
if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {		if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
bool IsPairedInsn = isPairedLdSt(MemMI);		bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {		for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
Register DestReg = getLdStRegOp(MemMI, i).getReg();		Register DestReg = getLdStRegOp(MemMI, i).getReg();
if (DestReg == BaseReg \|\| TRI->isSubRegister(BaseReg, DestReg))		if (DestReg == BaseReg \|\| TRI->isSubRegister(BaseReg, DestReg))
return E;		return E;
}		}
}		}

// Track which register units have been modified and used between the first		// Track which register units have been modified and used between the first
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(		MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
MachineBasicBlock::iterator I, unsigned Limit) {		MachineBasicBlock::iterator I, unsigned Limit) {
MachineBasicBlock::iterator B = I->getParent()->begin();		MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator E = I->getParent()->end();		MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr &MemMI = *I;		MachineInstr &MemMI = *I;
MachineBasicBlock::iterator MBBI = I;		MachineBasicBlock::iterator MBBI = I;
MachineFunction &MF = *MemMI.getMF();		MachineFunction &MF = *MemMI.getMF();

Register BaseReg = getLdStBaseOp(MemMI).getReg();		Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
int Offset = getLdStOffsetOp(MemMI).getImm();		int Offset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm();

// If the load/store is the first instruction in the block, there's obviously		// If the load/store is the first instruction in the block, there's obviously
// not any matching update. Ditto if the memory offset isn't zero.		// not any matching update. Ditto if the memory offset isn't zero.
if (MBBI == B \|\| Offset != 0)		if (MBBI == B \|\| Offset != 0)
return E;		return E;
// If the base register overlaps a destination register, we can't		// If the base register overlaps a destination register, we can't
// merge the update.		// merge the update.
if (!isTagStore(MemMI)) {		if (!isTagStore(MemMI)) {
bool IsPairedInsn = isPairedLdSt(MemMI);		bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {		for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
Register DestReg = getLdStRegOp(MemMI, i).getReg();		Register DestReg = getLdStRegOp(MemMI, i).getReg();
if (DestReg == BaseReg \|\| TRI->isSubRegister(BaseReg, DestReg))		if (DestReg == BaseReg \|\| TRI->isSubRegister(BaseReg, DestReg))
return E;		return E;
}		}
}		}

const bool BaseRegSP = BaseReg == AArch64::SP;		const bool BaseRegSP = BaseReg == AArch64::SP;
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
MachineBasicBlock::iterator &MBBI) {		MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;		MachineInstr &MI = *MBBI;
// If this is a volatile load, don't mess with it.		// If this is a volatile load, don't mess with it.
if (MI.hasOrderedMemoryRef())		if (MI.hasOrderedMemoryRef())
return false;		return false;

// Make sure this is a reg+imm.		// Make sure this is a reg+imm.
// FIXME: It is possible to extend it to handle reg+reg cases.		// FIXME: It is possible to extend it to handle reg+reg cases.
if (!getLdStOffsetOp(MI).isImm())		if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm())
return false;		return false;

// Look backward up to LdStLimit instructions.		// Look backward up to LdStLimit instructions.
MachineBasicBlock::iterator StoreI;		MachineBasicBlock::iterator StoreI;
if (findMatchingStore(MBBI, LdStLimit, StoreI)) {		if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
++NumLoadsFromStoresPromoted;		++NumLoadsFromStoresPromoted;
// Promote the load. Keeping the iterator straight is a		// Promote the load. Keeping the iterator straight is a
// pain, so we let the merge routine tell us what the next instruction		// pain, so we let the merge routine tell us what the next instruction
Show All 37 Lines	bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {

if (!TII->isCandidateToMergeOrPair(MI))		if (!TII->isCandidateToMergeOrPair(MI))
return false;		return false;

// Early exit if the offset is not possible to match. (6 bits of positive		// Early exit if the offset is not possible to match. (6 bits of positive
// range, plus allow an extra one in case we find a later insn that matches		// range, plus allow an extra one in case we find a later insn that matches
// with Offset-1)		// with Offset-1)
bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);		bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
int Offset = getLdStOffsetOp(MI).getImm();		int Offset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;		int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
// Allow one more for offset.		// Allow one more for offset.
if (Offset > 0)		if (Offset > 0)
Offset -= OffsetStride;		Offset -= OffsetStride;
if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))		if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
return false;		return false;

// Look ahead up to LdStLimit instructions for a pairable instruction.		// Look ahead up to LdStLimit instructions for a pairable instruction.
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines	if (Update != E) {
// Merge the update into the ld/st.		// Merge the update into the ld/st.
MBBI = mergeUpdateInsn(MBBI, Update, /IsPreIdx=/true);		MBBI = mergeUpdateInsn(MBBI, Update, /IsPreIdx=/true);
return true;		return true;
}		}

// The immediate in the load/store is scaled by the size of the memory		// The immediate in the load/store is scaled by the size of the memory
// operation. The immediate in the add we're looking for,		// operation. The immediate in the add we're looking for,
// however, is not, so adjust here.		// however, is not, so adjust here.
int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);		int UnscaledOffset =
		AArch64InstrInfo::getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);

// Look forward to try to find a pre-index instruction. For example,		// Look forward to try to find a pre-index instruction. For example,
// ldr x1, [x0, #64]		// ldr x1, [x0, #64]
// add x0, x0, #64		// add x0, x0, #64
// merged into:		// merged into:
// ldr x1, [x0, #64]!		// ldr x1, [x0, #64]!
Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);		Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
if (Update != E) {		if (Update != E) {
▲ Show 20 Lines • Show All 130 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp

	//===- AArch64MachineScheduler.cpp - MI Scheduler for AArch64 -------------===//			//===- AArch64MachineScheduler.cpp - MI Scheduler for AArch64 -------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "AArch64MachineScheduler.h"			#include "AArch64MachineScheduler.h"
				#include "AArch64InstrInfo.h"
				#include "AArch64Subtarget.h"
	#include "MCTargetDesc/AArch64MCTargetDesc.h"			#include "MCTargetDesc/AArch64MCTargetDesc.h"

	using namespace llvm;			using namespace llvm;

				static bool needReorderStoreMI(const MachineInstr *MI) {
				if (!MI)
				return false;

				switch (MI->getOpcode()) {
				default:
				return false;
				case AArch64::STURQi:
				case AArch64::STRQui:
				if (MI->getMF()->getSubtarget<AArch64Subtarget>().isStoreAddressAscend())
				return false;
				LLVM_FALLTHROUGH;
				case AArch64::STPQi:
				return AArch64InstrInfo::getLdStOffsetOp(*MI).getType() == MachineOperand::MO_Immediate;
				dmgreenUnsubmitted Not Done Reply Inline Actions Why do we need to check it is an immediate? dmgreen: Why do we need to check it is an immediate?
				AllenAuthorUnsubmitted Done Reply Inline Actions this is because some IR used to match the str q0, [x8, :lo12:seed_lo] for example, and we can't get its offset Allen: this is because some IR used to match the str q0, [x8, :lo12:seed_lo] for example, and we…
				dmgreenUnsubmitted Not Done Reply Inline Actions Why is this needed? dmgreen: Why is this needed?
				AllenAuthorUnsubmitted Done Reply Inline Actions if we don't check this, it will crash to use getImm() get the immediate with the above str q0, [x8, :lo12:seed_lo] Allen: if we don't check this, it will crash to use getImm() get the immediate with the above…
				dmgreenUnsubmitted Not Done Reply Inline Actions OK I see, it is a TargetFlags. That makes sense. It doesn't look like we ever generate them for STQP. dmgreen: OK I see, it is a TargetFlags. That makes sense. It doesn't look like we ever generate them for…
				}

				return false;
				}

				// Return true if two stores with same base address may overlap writes
				static bool mayOverlapWrite(const MachineInstr &MI0, const MachineInstr &MI1,
				int64_t &Off0, int64_t &Off1) {
				const MachineOperand &Base0 = AArch64InstrInfo::getLdStBaseOp(MI0);
				const MachineOperand &Base1 = AArch64InstrInfo::getLdStBaseOp(MI1);

				// May overlapping writes if two store instructions without same base
				if (!Base0.isIdenticalTo(Base1))
				return true;

				int StoreSize0 = AArch64InstrInfo::getMemScale(MI0);
				int StoreSize1 = AArch64InstrInfo::getMemScale(MI1);
				Off0 = AArch64InstrInfo::hasUnscaledLdStOffset(MI0.getOpcode())
				? AArch64InstrInfo::getLdStOffsetOp(MI0).getImm()
				: AArch64InstrInfo::getLdStOffsetOp(MI0).getImm() * StoreSize0;
				Off1 = AArch64InstrInfo::hasUnscaledLdStOffset(MI1.getOpcode())
				? AArch64InstrInfo::getLdStOffsetOp(MI1).getImm()
				: AArch64InstrInfo::getLdStOffsetOp(MI1).getImm() * StoreSize1;

				const MachineInstr &MI = (Off0 < Off1) ? MI0 : MI1;
				int Multiples = AArch64InstrInfo::isPairedLdSt(MI) ? 2 : 1;
				int StoreSize = AArch64InstrInfo::getMemScale(MI) * Multiples;

				return llabs(Off0 - Off1) < StoreSize;
				}

	bool AArch64PostRASchedStrategy::tryCandidate(SchedCandidate &Cand,			bool AArch64PostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
	SchedCandidate &TryCand) {			SchedCandidate &TryCand) {
	bool OriginalResult = PostGenericScheduler::tryCandidate(Cand, TryCand);			bool OriginalResult = PostGenericScheduler::tryCandidate(Cand, TryCand);

	if (Cand.isValid()) {			if (Cand.isValid()) {
	MachineInstr *Instr0 = TryCand.SU->getInstr();			MachineInstr *Instr0 = TryCand.SU->getInstr();
	MachineInstr *Instr1 = Cand.SU->getInstr();			MachineInstr *Instr1 = Cand.SU->getInstr();
	// When dealing with two STPqi's.
	if (Instr0 && Instr1 && Instr0->getOpcode() == Instr1->getOpcode () &&			if (!needReorderStoreMI(Instr0) \|\| !needReorderStoreMI(Instr1))
	Instr0->getOpcode() == AArch64::STPQi)			return OriginalResult;
	{
	MachineOperand &Base0 = Instr0->getOperand(2);			int64_t Off0, Off1;
	MachineOperand &Base1 = Instr1->getOperand(2);
	int64_t Off0 = Instr0->getOperand(3).getImm();
	int64_t Off1 = Instr1->getOperand(3).getImm();
	// With the same base address and non-overlapping writes.			// With the same base address and non-overlapping writes.
	if (Base0.isIdenticalTo(Base1) && llabs (Off0 - Off1) >= 2) {			if (!mayOverlapWrite(Instr0, Instr1, Off0, Off1)) {
	TryCand.Reason = NodeOrder;			TryCand.Reason = NodeOrder;
	// Order them by ascending offsets.			// Order them by ascending offsets.
	return Off0 < Off1;			return Off0 < Off1;
	}			}
	}			}
	}

	return OriginalResult;			return OriginalResult;
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

[MachineScheduler] Order more stores by ascending address
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 436308

llvm/lib/Target/AArch64/AArch64.td

llvm/lib/Target/AArch64/AArch64InstrInfo.h

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[MachineScheduler] Order more stores by ascending addressClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 436308

llvm/lib/Target/AArch64/AArch64.td

llvm/lib/Target/AArch64/AArch64InstrInfo.h

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp

[MachineScheduler] Order more stores by ascending address
ClosedPublic