Index: lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2161,33 +2161,40 @@ unsigned LastBytes = 0; unsigned NumMove = 0; for (int i = Ops.size() - 1; i >= 0; --i) { + // Make sure each operation has the same kind. MachineInstr *Op = Ops[i]; - unsigned Loc = MI2LocMap[Op]; - if (Loc <= FirstLoc) { - FirstLoc = Loc; - FirstOp = Op; - } - if (Loc >= LastLoc) { - LastLoc = Loc; - LastOp = Op; - } - unsigned LSMOpcode = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); if (LastOpcode && LSMOpcode != LastOpcode) break; + // Check that we have a continuous set of offsets. int Offset = getMemoryOpOffset(*Op); unsigned Bytes = getLSMultipleTransferSize(Op); if (LastBytes) { if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) break; } + + // Don't try to reschedule too many instructions. + if (NumMove == 8) // FIXME: Tune this limit. + break; + + // Found a mergable instruction; save information about it. + ++NumMove; LastOffset = Offset; LastBytes = Bytes; LastOpcode = LSMOpcode; - if (++NumMove == 8) // FIXME: Tune this limit. - break; + + unsigned Loc = MI2LocMap[Op]; + if (Loc <= FirstLoc) { + FirstLoc = Loc; + FirstOp = Op; + } + if (Loc >= LastLoc) { + LastLoc = Loc; + LastOp = Op; + } } if (NumMove <= 1) Index: test/CodeGen/ARM/ldm-stm-i256.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/ldm-stm-i256.ll @@ -0,0 +1,38 @@ +; RUN: llc -mtriple=armv7--eabi -verify-machineinstrs < %s + +; Check the way we schedule/merge a bunch of loads and stores. +; Originally test/CodeGen/ARM/2011-07-07-ScheduleDAGCrash.ll ; now +; being used as a test of optimizations related to ldm/stm. + +; FIXME: We could merge more loads/stores with regalloc hints. +; FIXME: Fix scheduling so we don't have 16 live registers. + +define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp { +entry: + %c = load i256, i256* %cc + %d = load i256, i256* %dd + %add = add nsw i256 %c, %d + store i256 %add, i256* %a, align 8 + %or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376 + %add6 = add nsw i256 %or, %d + store i256 %add6, i256* %b, align 8 + ret void + ; CHECK-DAG: ldm r3 + ; CHECK-DAG: ldm r2 + ; CHECK-DAG: ldr {{,*}}, [r3, #20] + ; CHECK-DAG: ldr {{,*}}, [r3, #16] + ; CHECK-DAG: ldr {{,*}}, [r3, #28] + ; CHECK-DAG: ldr {{,*}}, [r3, #24] + ; CHECK-DAG: ldr {{,*}}, [r2, #20] + ; CHECK-DAG: ldr {{,*}}, [r2, #16] + ; CHECK-DAG: ldr {{,*}}, [r2, #28] + ; CHECK-DAG: ldr {{,*}}, [r2, #24] + ; CHECK-DAG: stmib r0 + ; CHECK-DAG: str {{.*}}, [r0] + ; CHECK-DAG: str {{.*}}, [r0, #24] + ; CHECK-DAG: str {{.*}}, [r0, #28] + ; CHECK-DAG: str {{.*}}, [r1] + ; CHECK-DAG: stmib r1 + ; CHECK-DAG: str {{.*}}, [r1, #24] + ; CHECK-DAG: str {{.*}}, [r1, #28] +} Index: test/CodeGen/ARM/prera-ldst-insertpt.mir =================================================================== --- test/CodeGen/ARM/prera-ldst-insertpt.mir +++ test/CodeGen/ARM/prera-ldst-insertpt.mir @@ -36,6 +36,14 @@ t2STRi12 %1, %0, 0, 14, _ :: (store 4) %10 : rgpr = t2LSLri %2, 1, 14, _, _ t2STRi12 killed %10, %0, 4, 14, _ :: (store 4) + + ; Make sure we move the paired stores next to each other, and + ; insert them in an appropriate location. + ; CHECK: t2STRi12 %1, + ; CHECK-NEXT: t2STRi12 killed %10, + ; CHECK-NEXT: t2MOVi + ; CHECK-NEXT: t2ADDrs + %11 : rgpr = t2MOVi 55, 14, _, _ %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, _, _ t2STRi12 killed %12, %0, 16, 14, _ :: (store 4) @@ -43,12 +51,7 @@ t2STRi12 killed %13, %0, 20, 14, _ :: (store 4) ; Make sure we move the paired stores next to each other. - ; FIXME: Make sure we don't extend the live-range of a store - ; when we don't need to. - ; CHECK: t2STRi12 %1, - ; CHECK-NEXT: t2STRi12 killed %10, - ; CHECK-NEXT: %13 = t2ADDrs %11 - ; CHECK-NEXT: t2STRi12 killed %12, + ; CHECK: t2STRi12 killed %12, ; CHECK-NEXT: t2STRi12 killed %13, tBX_RET 14, _ @@ -73,6 +76,15 @@ t2STRi12 killed %10, %0, 4, 14, _ :: (store 4) %3 : rgpr = t2MUL %2, %2, 14, _ t2STRi12 %3, %0, 8, 14, _ :: (store 4) + + ; Make sure we move the paired stores next to each other, and + ; insert them in an appropriate location. + ; CHECK: t2STRi12 {{.*}}, 0 + ; CHECK-NEXT: t2STRi12 {{.*}}, 4 + ; CHECK-NEXT: t2STRi12 {{.*}}, 8 + ; CHECK-NEXT: t2MUL + ; CHECK-NEXT: t2MOVi32imm + %4 : rgpr = t2MUL %1, %1, 14, _ %5 : rgpr = t2MOVi32imm -858993459 %6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, _ @@ -85,13 +97,7 @@ t2STRi12 killed %13, %0, 20, 14, _ :: (store 4) ; Make sure we move the paired stores next to each other. - ; FIXME: Make sure we don't extend the live-range of a store - ; when we don't need to. - ; CHECK: t2STRi12 {{.*}}, 0 - ; CHECK-NEXT: t2STRi12 {{.*}}, 4 - ; CHECK-NEXT: t2STRi12 {{.*}}, 8 - ; CHECK-NEXT: t2ADDrs - ; CHECK-NEXT: t2STRi12 {{.*}}, 16 + ; CHECK: t2STRi12 {{.*}}, 16 ; CHECK-NEXT: t2STRi12 {{.*}}, 20 tBX_RET 14, _