Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -1192,6 +1192,23 @@ return nullptr; } + // areMemAccessesTriviallyDiff - Sometimes, it is possible for the target to + // tell, even without aliasing information, that two MIs access different + // memory addresses. This function returns true if two MIs access different + // memory addresses, and false otherwise. + virtual bool areMemAccessesTriviallyDiff(MachineInstr *MIa, + MachineInstr *MIb) const { + return false; + } + + /// For instructions with a base and offset, return the position of the + /// base register and offset operands. + virtual bool getBaseAndOffsetPosition(const MachineInstr *MI, + unsigned &BasePos, + unsigned &OffsetPos) const { + return false; + } + private: int CallFrameSetupOpcode, CallFrameDestroyOpcode; }; Index: lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- lib/CodeGen/ScheduleDAGInstrs.cpp +++ lib/CodeGen/ScheduleDAGInstrs.cpp @@ -511,10 +511,16 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, MachineInstr *MIa, MachineInstr *MIb) { + const MachineFunction *MF = MIa->getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + // Cover a trivial case - no edge is need to itself. if (MIa == MIb) return false; + if (TII->areMemAccessesTriviallyDiff(MIa, MIb)) + return false; + // FIXME: Need to handle multiple memory operands to support all targets. if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) return true; Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -52,6 +52,9 @@ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override; + bool areMemAccessesTriviallyDiff(MachineInstr *MIa, + MachineInstr *MIb) const override; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const override; unsigned isStoreToStackSlot(const MachineInstr *MI, @@ -90,6 +93,10 @@ unsigned &Offset, const TargetRegisterInfo *TRI) const override; + bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned &BaseReg, + int &Offset, int &Width, + const TargetRegisterInfo *TRI) const; + bool enableClusterLoads() const override { return true; } bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt, Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -607,6 +607,37 @@ } } +bool AArch64InstrInfo::areMemAccessesTriviallyDiff(MachineInstr *MIa, + MachineInstr *MIb) const { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned BaseRegA = 0, BaseRegB = 0; + int OffsetA = 0, OffsetB = 0; + int WidthA = 0, WidthB = 0; + + if (!MIa->mayStore() && !MIa->mayLoad()) + return false; + if (!MIb->mayStore() && !MIb->mayLoad()) + return false; + + // Retrieve the base register, offset from the base register and width. Width + // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If + // base registers are identical, and the offset of a lower memory access + + // the width doesn't overlap the offset of a higher memory access, + // then the memory access are different. + if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && + getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { + if (BaseRegA == BaseRegB) { + int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; + int HighOffset = (LowOffset == OffsetA) ? OffsetB : OffsetA; + int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; + if (LowOffset + LowWidth <= HighOffset) + return true; + } + } + return false; +} + + /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. /// Return true if the comparison instruction can be analyzed. @@ -1270,6 +1301,101 @@ }; } + +bool +AArch64InstrInfo::getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, + unsigned &BaseReg, int &Offset, + int &Width, const TargetRegisterInfo *TRI) const { + // Offset is calculated as the immediate operand multiplied by the scaling factor. + // Unscaled instructions have scaling factor set to 1. + int Scale = 0; + switch (LdSt->getOpcode()) { + default: + return false; + case AArch64::LDURQi: + case AArch64::STURQi: + Width = 16; + Scale = 1; + break; + case AArch64::LDURXi: + case AArch64::LDURDi: + case AArch64::STURXi: + case AArch64::STURDi: + Width = 8; + Scale = 1; + break; + case AArch64::LDURWi: + case AArch64::LDURSi: + case AArch64::LDURSWi: + case AArch64::STURWi: + case AArch64::STURSi: + Width = 4; + Scale = 1; + break; + case AArch64::LDURHi: + case AArch64::LDURHHi: + case AArch64::LDURSHXi: + case AArch64::LDURSHWi: + case AArch64::STURHi: + case AArch64::STURHHi: + Width = 2; + Scale = 1; + break; + case AArch64::LDURBi: + case AArch64::LDURBBi: + case AArch64::LDURSBXi: + case AArch64::LDURSBWi: + case AArch64::STURBi: + case AArch64::STURBBi: + Width = 1; + Scale = 1; + break; + case AArch64::LDRXui: + case AArch64::STRXui: + Scale = Width = 8; + break; + case AArch64::LDRWui: + case AArch64::STRWui: + Scale = Width = 4; + break; + case AArch64::LDRBui: + case AArch64::STRBui: + Scale = Width = 1; + break; + case AArch64::LDRHui: + case AArch64::STRHui: + Scale = Width = 2; + break; + case AArch64::LDRSui: + case AArch64::STRSui: + Scale = Width = 4; + break; + case AArch64::LDRDui: + case AArch64::STRDui: + Scale = Width = 8; + break; + case AArch64::LDRQui: + case AArch64::STRQui: + Scale = Width = 16; + break; + case AArch64::LDRBBui: + case AArch64::STRBBui: + Scale = Width = 1; + break; + case AArch64::LDRHHui: + case AArch64::STRHHui: + Scale = Width = 2; + break; + }; + + if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) + return false; + + BaseReg = LdSt->getOperand(1).getReg(); + Offset = LdSt->getOperand(2).getImm() * Scale; + return true; +} + /// Detect opportunities for ldp/stp formation. /// /// Only called for LdSt for which getLdStBaseRegImmOfs returns true. Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -111,6 +111,8 @@ bool isCortexA57() const { return CPUString == "cortex-a57"; } bool isCortexA53() const { return CPUString == "cortex-a53"; } + bool useAA() const override { return true; } + /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { return 64; } Index: test/CodeGen/AArch64/arm64-triv-diff-mem-access.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-triv-diff-mem-access.ll @@ -0,0 +1,24 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; Check that there is no edge created between stores that use the same base +; register, and whose memory accesses do not overlap. + +; CHECK: Reject chain dep +; Function Attrs: nounwind +define void @func(i32* nocapture %a, i32 %i, i32 %j) #0 { +entry: + store i32 %i, i32* %a, align 4, !tbaa !1 + %arrayidx1 = getelementptr inbounds i32* %a, i64 1 + store i32 %j, i32* %arrayidx1, align 4, !tbaa !1 + ret void +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"clang version 3.6.0 "} +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"int", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"}