Index: lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2162,33 +2162,39 @@ unsigned LastBytes = 0; unsigned NumMove = 0; for (int i = Ops.size() - 1; i >= 0; --i) { + // Make sure each operation has the same kind. MachineInstr *Op = Ops[i]; - unsigned Loc = MI2LocMap[Op]; - if (Loc <= FirstLoc) { - FirstLoc = Loc; - FirstOp = Op; - } - if (Loc >= LastLoc) { - LastLoc = Loc; - LastOp = Op; - } - unsigned LSMOpcode = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); if (LastOpcode && LSMOpcode != LastOpcode) break; + // Check that we have a continuous set of offsets. int Offset = getMemoryOpOffset(*Op); unsigned Bytes = getLSMultipleTransferSize(Op); if (LastBytes) { if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) break; } + + // Don't try to reschedule too many instructions. + if (++NumMove == 8) // FIXME: Tune this limit. + break; + + // Found a mergable instruction; save information about it. LastOffset = Offset; LastBytes = Bytes; LastOpcode = LSMOpcode; - if (++NumMove == 8) // FIXME: Tune this limit. - break; + + unsigned Loc = MI2LocMap[Op]; + if (Loc <= FirstLoc) { + FirstLoc = Loc; + FirstOp = Op; + } + if (Loc >= LastLoc) { + LastLoc = Loc; + LastOp = Op; + } } if (NumMove <= 1) Index: test/CodeGen/ARM/prera-ldst-insertpt.mir =================================================================== --- /dev/null +++ test/CodeGen/ARM/prera-ldst-insertpt.mir @@ -0,0 +1,110 @@ +# RUN: llc -run-pass arm-prera-ldst-opt %s -o - | FileCheck %s +--- | + ; ModuleID = '' + source_filename = "-" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv7---eabi" + + ; Function Attrs: norecurse nounwind optsize + define void @a(i32* nocapture %x, i32 %y, i32 %z) local_unnamed_addr #0 { + entry: + store i32 %y, i32* %x, align 4, !tbaa !3 + %add = shl nsw i32 %z, 1 + %arrayidx1 = getelementptr inbounds i32, i32* %x, i32 1 + store i32 %add, i32* %arrayidx1, align 4, !tbaa !3 + %mul = mul nsw i32 %z, %z + %div = udiv i32 %mul, 5 + %add2 = add nuw nsw i32 %div, 55 + %arrayidx3 = getelementptr inbounds i32, i32* %x, i32 4 + store i32 %add2, i32* %arrayidx3, align 4, !tbaa !3 + %mul4 = mul nsw i32 %y, %y + %div5 = udiv i32 %mul4, 5 + %add6 = add nuw nsw i32 %div5, 55 + %arrayidx7 = getelementptr inbounds i32, i32* %x, i32 5 + store i32 %add6, i32* %arrayidx7, align 4, !tbaa !3 + ret void + } + + attributes #0 = { norecurse nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+strict-align,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } + + !llvm.module.flags = !{!0, !1} + !llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 1, !"min_enum_size", i32 4} + !2 = !{!"clang version 5.0.0 (/local2/mnt/workspace2/efriedma/mainline/llvm/tools/clang 67484711e3ad73065cf83f6229dca6e61b3d0704) (llvm/ 3c6ba24628333e85628429ad0e2e809c4c0ab265)"} + !3 = !{!4, !4, i64 0} + !4 = !{!"int", !5, i64 0} + !5 = !{!"omnipotent char", !6, i64 0} + !6 = !{!"Simple C/C++ TBAA"} + +... +--- +name: a +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: rgpr } + - { id: 2, class: rgpr } + - { id: 3, class: rgpr } + - { id: 4, class: rgpr } + - { id: 5, class: rgpr } + - { id: 6, class: rgpr } + - { id: 7, class: rgpr } + - { id: 8, class: rgpr } + - { id: 9, class: rgpr } + - { id: 10, class: rgpr } + - { id: 11, class: rgpr } + - { id: 12, class: gprnopc } + - { id: 13, class: gprnopc } +liveins: + - { reg: '%r0', virtual-reg: '%0' } + - { reg: '%r1', virtual-reg: '%1' } + - { reg: '%r2', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r0, %r1, %r2 + + %2 = COPY %r2 + %1 = COPY %r1 + %0 = COPY %r0 + %3 = t2MUL %2, %2, 14, _ + %4 = t2MUL %1, %1, 14, _ + %5 = t2MOVi32imm -858993459 + %6, %7 = t2UMULL killed %3, %5, 14, _ + %8, %9 = t2UMULL killed %4, %5, 14, _ + t2STRi12 %1, %0, 0, 14, _ :: (store 4 into %ir.x, !tbaa !3) + %10 = t2LSLri %2, 1, 14, _, _ + t2STRi12 killed %10, %0, 4, 14, _ :: (store 4 into %ir.arrayidx1, !tbaa !3) + %11 = t2MOVi 55, 14, _, _ + %12 = t2ADDrs %11, killed %7, 19, 14, _, _ + t2STRi12 killed %12, %0, 16, 14, _ :: (store 4 into %ir.arrayidx3, !tbaa !3) + %13 = t2ADDrs %11, killed %9, 19, 14, _, _ + t2STRi12 killed %13, %0, 20, 14, _ :: (store 4 into %ir.arrayidx7, !tbaa !3) + tBX_RET 14, _ + +... +# Make sure we move the paired stores next to each other. +# CHECK: t2STRi12 %1, +# CHECK-NEXT: t2STRi12 killed %10, +# CHECK: t2STRi12 killed %12, +# CHECK-NEXT: t2STRi12 killed %13,