Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -1192,6 +1192,23 @@ return nullptr; } + // areMemAccessesTriviallyDiff - Sometimes, it is possible for the target to + // tell, even without aliasing information, that two MIs access different + // memory addresses. This function returns true if two MIs access different + // memory addresses, and false otherwise. + virtual bool areMemAccessesTriviallyDiff(MachineInstr *MIa, + MachineInstr *MIb) const { + return false; + } + + /// For instructions with a base and offset, return the position of the + /// base register and offset operands. + virtual bool getBaseAndOffsetPosition(const MachineInstr *MI, + unsigned &BasePos, + unsigned &OffsetPos) const { + return false; + } + private: int CallFrameSetupOpcode, CallFrameDestroyOpcode; }; Index: lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- lib/CodeGen/ScheduleDAGInstrs.cpp +++ lib/CodeGen/ScheduleDAGInstrs.cpp @@ -511,10 +511,16 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, MachineInstr *MIa, MachineInstr *MIb) { + const MachineFunction *MF = MIa->getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + // Cover a trivial case - no edge is need to itself. if (MIa == MIb) return false; + if (TII->areMemAccessesTriviallyDiff(MIa, MIb)) + return false; + // FIXME: Need to handle multiple memory operands to support all targets. if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) return true; Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -52,6 +52,9 @@ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override; + bool areMemAccessesTriviallyDiff(MachineInstr *MIa, + MachineInstr *MIb) const override; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const override; unsigned isStoreToStackSlot(const MachineInstr *MI, @@ -90,6 +93,10 @@ unsigned &Offset, const TargetRegisterInfo *TRI) const override; + bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned &BaseReg, + int &Offset, int &Width, + const TargetRegisterInfo *TRI) const; + bool enableClusterLoads() const override { return true; } bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt, Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -607,6 +607,37 @@ } } +bool AArch64InstrInfo::areMemAccessesTriviallyDiff(MachineInstr *MIa, + MachineInstr *MIb) const { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned BaseRegA = 0, BaseRegB = 0; + int OffsetA = 0, OffsetB = 0; + int WidthA = 0, WidthB = 0; + + if (!MIa->mayStore() && !MIa->mayLoad()) + return false; + if (!MIb->mayStore() && !MIb->mayLoad()) + return false; + + // Retrieve the base register, offset from the base register and width. Width + // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If + // base registers are identical, and the offset of a lower memory access + + // the width doesn't overlap the offset of a higher memory access, + // then the memory access are different. + if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && + getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { + if (BaseRegA == BaseRegB) { + int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; + int HighOffset = (LowOffset == OffsetA) ? OffsetB : OffsetA; + int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; + if (LowOffset + LowWidth <= HighOffset) + return true; + } + } + return false; +} + + /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. /// Return true if the comparison instruction can be analyzed. @@ -1270,6 +1301,101 @@ }; } + +bool +AArch64InstrInfo::getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, + unsigned &BaseReg, int &Offset, + int &Width, const TargetRegisterInfo *TRI) const { + // Offset is calculated as the immediate operand multiplied by the scaling factor. + // Unscaled instructions have scaling factor set to 1. + int Scale = 0; + switch (LdSt->getOpcode()) { + default: + return false; + case AArch64::LDURQi: + case AArch64::STURQi: + Width = 16; + Scale = 1; + break; + case AArch64::LDURXi: + case AArch64::LDURDi: + case AArch64::STURXi: + case AArch64::STURDi: + Width = 8; + Scale = 1; + break; + case AArch64::LDURWi: + case AArch64::LDURSi: + case AArch64::LDURSWi: + case AArch64::STURWi: + case AArch64::STURSi: + Width = 4; + Scale = 1; + break; + case AArch64::LDURHi: + case AArch64::LDURHHi: + case AArch64::LDURSHXi: + case AArch64::LDURSHWi: + case AArch64::STURHi: + case AArch64::STURHHi: + Width = 2; + Scale = 1; + break; + case AArch64::LDURBi: + case AArch64::LDURBBi: + case AArch64::LDURSBXi: + case AArch64::LDURSBWi: + case AArch64::STURBi: + case AArch64::STURBBi: + Width = 1; + Scale = 1; + break; + case AArch64::LDRXui: + case AArch64::STRXui: + Scale = Width = 8; + break; + case AArch64::LDRWui: + case AArch64::STRWui: + Scale = Width = 4; + break; + case AArch64::LDRBui: + case AArch64::STRBui: + Scale = Width = 1; + break; + case AArch64::LDRHui: + case AArch64::STRHui: + Scale = Width = 2; + break; + case AArch64::LDRSui: + case AArch64::STRSui: + Scale = Width = 4; + break; + case AArch64::LDRDui: + case AArch64::STRDui: + Scale = Width = 8; + break; + case AArch64::LDRQui: + case AArch64::STRQui: + Scale = Width = 16; + break; + case AArch64::LDRBBui: + case AArch64::STRBBui: + Scale = Width = 1; + break; + case AArch64::LDRHHui: + case AArch64::STRHHui: + Scale = Width = 2; + break; + }; + + if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) + return false; + + BaseReg = LdSt->getOperand(1).getReg(); + Offset = LdSt->getOperand(2).getImm() * Scale; + return true; +} + /// Detect opportunities for ldp/stp formation. /// /// Only called for LdSt for which getLdStBaseRegImmOfs returns true. Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -111,6 +111,8 @@ bool isCortexA57() const { return CPUString == "cortex-a57"; } bool isCortexA53() const { return CPUString == "cortex-a53"; } + bool useAA() const override { return true; } + /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { return 64; }