Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -886,10 +886,13 @@ return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0; case ARMII::AddrModeT2_i7s4: return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0; + case ARMII::AddrMode2: case ARMII::AddrModeT2_i8: return std::abs(Imm) < (((1 << 8) * 1) - 1); case ARMII::AddrModeT2_i12: return Imm >= 0 && Imm < (((1 << 12) * 1) - 1); + case ARMII::AddrModeT2_i8s4: + return std::abs(Imm) < (((1 << 8) * 4) - 1) && Imm % 4 == 0; default: llvm_unreachable("Unhandled Addressing mode"); } Index: llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1497,12 +1497,16 @@ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); } else { MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI); - if (Offset == Bytes) { - NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); - } else if (!isAM5 && Offset == -Bytes) { - NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); - } else + if (MergeInstr == MBB.end()) return false; + + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); + if ((isAM5 && Offset != Bytes) || + (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) { + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); + if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII)) + return false; + } } LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr); MBB.erase(MergeInstr); @@ -1541,7 +1545,7 @@ (void)MIB; LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB); } else { - int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + int Imm = ARM_AM::getAM2Opc(AddSub, abs(Offset), ARM_AM::no_shift); auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) @@ -1571,7 +1575,7 @@ // the vestigal zero-reg offset register. When that's fixed, this clause // can be removed entirely. if (isAM2 && NewOpc == ARM::STR_POST_IMM) { - int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + int Imm = ARM_AM::getAM2Opc(AddSub, abs(Offset), ARM_AM::no_shift); // STR_PRE, STR_POST auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) @@ -1624,13 +1628,14 @@ MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); unsigned NewOpc; - if (Offset == 8 || Offset == -8) { + if (Offset != 0) { NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; } else { MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI); - if (Offset == 8 || Offset == -8) { - NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; - } else + if (MergeInstr == MBB.end()) + return false; + NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; + if (!isLegalAddressImm(NewOpc, Offset, TII)) return false; } LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr); Index: llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll =================================================================== --- llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll +++ llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -1190,11 +1190,10 @@ ; THUMB-ENABLE-NEXT: @ %bb.1: @ %if.then ; THUMB-ENABLE-NEXT: push {r7, lr} ; THUMB-ENABLE-NEXT: mov r7, sp -; THUMB-ENABLE-NEXT: sub sp, #12 +; THUMB-ENABLE-NEXT: strd r1, r1, [sp, #-12]! ; THUMB-ENABLE-NEXT: mov r0, r1 ; THUMB-ENABLE-NEXT: mov r2, r1 ; THUMB-ENABLE-NEXT: mov r3, r1 -; THUMB-ENABLE-NEXT: strd r1, r1, [sp] ; THUMB-ENABLE-NEXT: str r1, [sp, #8] ; THUMB-ENABLE-NEXT: bl _someVariadicFunc ; THUMB-ENABLE-NEXT: lsls r0, r0, #3 Index: llvm/test/CodeGen/Thumb2/mve-float32regloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1716,7 +1716,7 @@ ; CHECK-NEXT: vmov r3, s10 ; CHECK-NEXT: vldrw.u32 q3, [r11, #48] ; CHECK-NEXT: vfma.f32 q1, q0, r3 -; CHECK-NEXT: ldr r3, [r1] +; CHECK-NEXT: ldr r3, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q7, r6 ; CHECK-NEXT: vldrw.u32 q6, [r11, #64] ; CHECK-NEXT: vfma.f32 q1, q3, r3 @@ -1726,7 +1726,6 @@ ; CHECK-NEXT: vfma.f32 q1, q5, r0 ; CHECK-NEXT: vldrw.u32 q0, [sp, #64] @ 16-byte Reload ; CHECK-NEXT: vfma.f32 q1, q4, r7 -; CHECK-NEXT: adds r1, #16 ; CHECK-NEXT: vfma.f32 q1, q0, r9 ; CHECK-NEXT: vmov.f32 s2, s8 ; CHECK-NEXT: vstrb.8 q1, [r5], #16 Index: llvm/test/CodeGen/Thumb2/mve-vld3.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vld3.ll +++ llvm/test/CodeGen/Thumb2/mve-vld3.ll @@ -254,13 +254,13 @@ ; CHECK-NEXT: vldrh.u32 q0, [r0] ; CHECK-NEXT: ldr r2, [r0, #8] ; CHECK-NEXT: mov r3, sp -; CHECK-NEXT: str r2, [sp] ; CHECK-NEXT: vmov.f64 d2, d0 ; CHECK-NEXT: vmov.f32 s6, s3 -; CHECK-NEXT: vmov.f32 s8, s1 -; CHECK-NEXT: vmov.f64 d6, d1 ; CHECK-NEXT: vmov r0, s6 +; CHECK-NEXT: str r2, [sp], #8 ; CHECK-NEXT: vldrh.u32 q1, [r3] +; CHECK-NEXT: vmov.f32 s8, s1 +; CHECK-NEXT: vmov.f64 d6, d1 ; CHECK-NEXT: vmov.f32 s10, s4 ; CHECK-NEXT: vmov.f32 s14, s5 ; CHECK-NEXT: vmov r2, s10 @@ -274,7 +274,6 @@ ; CHECK-NEXT: vmov r2, s12 ; CHECK-NEXT: add r0, r2 ; CHECK-NEXT: strh r0, [r1] -; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: bx lr entry: %l1 = load <6 x i16>, <6 x i16>* %src, align 4 Index: llvm/test/CodeGen/Thumb2/mve-vst3.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -704,12 +704,11 @@ ; CHECK-NEXT: vmov.16 q0[7], r6 ; CHECK-NEXT: vstrb.16 q0, [r2] ; CHECK-NEXT: vstrb.16 q0, [r0] +; CHECK-NEXT: ldr r2, [sp], #16 ; CHECK-NEXT: vldrh.u32 q0, [r0] -; CHECK-NEXT: ldr r2, [sp] ; CHECK-NEXT: str r2, [r1] ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: strh r0, [r1, #4] -; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %s1 = getelementptr <2 x i8>, <2 x i8>* %src, i32 0 Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_generated_funcs.ll.generated.expected =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_generated_funcs.ll.generated.expected +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_generated_funcs.ll.generated.expected @@ -100,8 +100,8 @@ ; CHECK-NEXT: sub sp, sp, #20 ; CHECK-NEXT: ldr r0, .LCPI1_0 ; CHECK-NEXT: mov r1, #1 -; CHECK-NEXT: mov r2, #3 ; CHECK-NEXT: mov r3, #4 +; CHECK-NEXT: mov r2, #3 ; CHECK-NEXT: str r1, [sp, #12] ; CHECK-NEXT: str r1, [r0] ; CHECK-NEXT: mov r0, #0 @@ -113,11 +113,10 @@ ; CHECK-NEXT: @APP ; CHECK-NEXT: @NO_APP ; CHECK-NEXT: str r0, [sp, #8] -; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: str r1, [sp, #12] ; CHECK-NEXT: str r2, [sp, #4] -; CHECK-NEXT: str r3, [sp] -; CHECK-NEXT: add sp, sp, #20 +; CHECK-NEXT: str r3, [sp], #20 +; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: mov pc, lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_generated_funcs.ll.nogenerated.expected =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_generated_funcs.ll.nogenerated.expected +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/arm_generated_funcs.ll.nogenerated.expected @@ -77,8 +77,8 @@ ; CHECK-NEXT: sub sp, sp, #20 ; CHECK-NEXT: ldr r0, .LCPI1_0 ; CHECK-NEXT: mov r1, #1 -; CHECK-NEXT: mov r2, #3 ; CHECK-NEXT: mov r3, #4 +; CHECK-NEXT: mov r2, #3 ; CHECK-NEXT: str r1, [sp, #12] ; CHECK-NEXT: str r1, [r0] ; CHECK-NEXT: mov r0, #0 @@ -90,11 +90,10 @@ ; CHECK-NEXT: @APP ; CHECK-NEXT: @NO_APP ; CHECK-NEXT: str r0, [sp, #8] -; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: str r1, [sp, #12] ; CHECK-NEXT: str r2, [sp, #4] -; CHECK-NEXT: str r3, [sp] -; CHECK-NEXT: add sp, sp, #20 +; CHECK-NEXT: str r3, [sp], #20 +; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: mov pc, lr ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: