Index: lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1961,6 +1961,7 @@ static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} + AliasAnalysis *AA; const DataLayout *TD; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -1974,6 +1975,11 @@ return ARM_PREALLOC_LOAD_STORE_OPT_NAME; } + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + private: bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc, unsigned &EvenReg, @@ -2003,6 +2009,7 @@ TRI = STI->getRegisterInfo(); MRI = &Fn.getRegInfo(); MF = &Fn; + AA = &getAnalysis().getAAResults(); bool Modified = false; for (MachineBasicBlock &MFI : Fn) @@ -2016,28 +2023,19 @@ MachineBasicBlock::iterator E, SmallPtrSetImpl &MemOps, SmallSet &MemRegs, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + AliasAnalysis *AA) { // Are there stores / loads / calls between them? - // FIXME: This is overly conservative. We should make use of alias information - // some day. SmallSet AddedRegPressure; while (++I != E) { if (I->isDebugValue() || MemOps.count(&*I)) continue; if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) return false; - if (isLd && I->mayStore()) - return false; - if (!isLd) { - if (I->mayLoad()) - return false; - // It's not safe to move the first 'str' down. - // str r1, [r0] - // strh r5, [r0] - // str r4, [r0, #+4] - if (I->mayStore()) - return false; - } + if (I->mayStore() || (!isLd && I->mayLoad())) + for (MachineInstr *MemOp : MemOps) + if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false)) + return false; for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { MachineOperand &MO = I->getOperand(j); if (!MO.isReg()) @@ -2212,7 +2210,7 @@ bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. if (DoMove) DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, - MemOps, MemRegs, TRI); + MemOps, MemRegs, TRI, AA); if (!DoMove) { for (unsigned i = 0; i != NumMove; ++i) Ops.pop_back(); Index: test/CodeGen/ARM/ldrd.ll =================================================================== --- test/CodeGen/ARM/ldrd.ll +++ test/CodeGen/ARM/ldrd.ll @@ -189,5 +189,23 @@ ret i32* %p1 } +; CHECK-LABEL: ldrd_strd_aa: +; NORMAL: ldrd [[TMP1:r[0-9]]], [[TMP2:r[0-9]]], +; NORMAL: strd [[TMP1]], [[TMP2]], +; CONSERVATIVE-NOT: ldrd +; CONSERVATIVE-NOT: strd +; CHECK: bx lr + +define void @ldrd_strd_aa(i32* noalias nocapture %x, i32* noalias nocapture readonly %y) { +entry: + %0 = load i32, i32* %y, align 4 + store i32 %0, i32* %x, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %y, i32 1 + %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %x, i32 1 + store i32 %1, i32* %arrayidx3, align 4 + ret void +} + declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind Index: test/CodeGen/ARM/prera-ldst-aliasing.mir =================================================================== --- /dev/null +++ test/CodeGen/ARM/prera-ldst-aliasing.mir @@ -0,0 +1,40 @@ +# RUN: llc -run-pass arm-prera-ldst-opt %s -o - | FileCheck %s +--- | + target triple = "thumbv7---eabi" + + define void @ldrd_strd_aa(i32* noalias nocapture %x, i32* noalias nocapture readonly %y) { + entry: + %0 = load i32, i32* %y, align 4 + store i32 %0, i32* %x, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %y, i32 1 + %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %x, i32 1 + store i32 %1, i32* %arrayidx3, align 4 + ret void + } +... +--- +name: ldrd_strd_aa +alignment: 1 +tracksRegLiveness: true +liveins: + - { reg: '%r0', virtual-reg: '%0' } + - { reg: '%r1', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: %r0, %r1 + + %1 : gpr = COPY %r1 + %0 : gpr = COPY %r0 + %2 : gpr = t2LDRi12 %1, 0, 14, _ :: (load 4 from %ir.y) + t2STRi12 killed %2, %0, 0, 14, _ :: (store 4 into %ir.x) + %3 : gpr = t2LDRi12 %1, 4, 14, _ :: (load 4 from %ir.arrayidx2) + t2STRi12 killed %3, %0, 4, 14, _ :: (store 4 into %ir.arrayidx3) + ; CHECK: t2LDRi12 + ; CHECK-NEXT: t2LDRi12 + ; CHECK-NEXT: t2STRi12 + ; CHECK-NEXT: t2STRi12 + tBX_RET 14, _ + +... +