Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -826,6 +826,10 @@ return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0; case ARMII::AddrModeT2_i7s4: return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0; + case ARMII::AddrModeT2_i8: + return std::abs(Imm) < (((1 << 8) * 1) - 1); + case ARMII::AddrModeT2_i12: + return Imm >= 0 && Imm < (((1 << 12) * 1) - 1); default: llvm_unreachable("Unhandled Addressing mode"); } Index: llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1382,9 +1382,27 @@ case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_POST; + case ARM::t2LDRBi8: + case ARM::t2LDRBi12: + return ARM::t2LDRB_POST; + case ARM::t2LDRSBi8: + case ARM::t2LDRSBi12: + return ARM::t2LDRSB_POST; + case ARM::t2LDRHi8: + case ARM::t2LDRHi12: + return ARM::t2LDRH_POST; + case ARM::t2LDRSHi8: + case ARM::t2LDRSHi12: + return ARM::t2LDRSH_POST; case ARM::t2STRi8: case ARM::t2STRi12: return ARM::t2STR_POST; + case ARM::t2STRBi8: + case ARM::t2STRBi12: + return ARM::t2STRB_POST; + case ARM::t2STRHi8: + case ARM::t2STRHi12: + return ARM::t2STRH_POST; case ARM::MVE_VLDRBS16: return ARM::MVE_VLDRBS16_post; @@ -2539,11 +2557,92 @@ case ARM::MVE_VSTRBU8: case ARM::MVE_VSTRHU16: case ARM::MVE_VSTRWU32: + case ARM::t2LDRHi8: + case ARM::t2LDRHi12: + case ARM::t2LDRSHi8: + case ARM::t2LDRSHi12: + case ARM::t2LDRBi8: + case ARM::t2LDRBi12: + case ARM::t2LDRSBi8: + case ARM::t2LDRSBi12: + case ARM::t2STRBi8: + case ARM::t2STRBi12: + case ARM::t2STRHi8: + case ARM::t2STRHi12: return 1; } return -1; } +// Given a memory access Opcode, check that the give Imm would be a valid Offset +// for this instruction (same as isLegalAddressImm), Or if the instruction +// could be easily converted to one where that was valid. For example converting +// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with +// AdjustBaseAndOffset below. +static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, + const TargetInstrInfo *TII) { + if (isLegalAddressImm(Opcode, Imm, TII)) + return true; + + // We can convert AddrModeT2_i12 to AddrModeT2_i8. + const MCInstrDesc &Desc = TII->get(Opcode); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + switch (AddrMode) { + case ARMII::AddrModeT2_i12: + return std::abs(Imm) < (((1 << 8) * 1) - 1); + } + return false; +} + +// Given an MI adjust its address BaseReg to use NewBaseReg and address offset +// by -Offset. This can either happen in-place or be a replacement as MI is +// converted to another instruction type. +static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, + int Offset, const TargetInstrInfo *TII) { + unsigned BaseOp = getBaseOperandIndex(*MI); + MI->getOperand(BaseOp).setReg(NewBaseReg); + int OldOffset = MI->getOperand(BaseOp + 1).getImm(); + if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII)) + MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset); + else { + unsigned ConvOpcode; + switch (MI->getOpcode()) { + case ARM::t2LDRHi12: + ConvOpcode = ARM::t2LDRHi8; + break; + case ARM::t2LDRSHi12: + ConvOpcode = ARM::t2LDRSHi8; + break; + case ARM::t2LDRBi12: + ConvOpcode = ARM::t2LDRBi8; + break; + case ARM::t2LDRSBi12: + ConvOpcode = ARM::t2LDRSBi8; + break; + case ARM::t2STRHi12: + ConvOpcode = ARM::t2STRHi8; + break; + case ARM::t2STRBi12: + ConvOpcode = ARM::t2STRBi8; + break; + default: + llvm_unreachable("Unhandled convertable opcode"); + } + assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) && + "Illegal Address Immediate after convert!"); + + const MCInstrDesc &MCID = TII->get(ConvOpcode); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addImm(OldOffset - Offset) + .add(MI->getOperand(3)) + .add(MI->getOperand(4)) + .cloneMemRefs(*MI); + MI->eraseFromParent(); + } +} + static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, @@ -2562,14 +2661,43 @@ TRC = TII->getRegClass(MCID, 2, TRI, *MF); MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC); - return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) - .addReg(NewReg, RegState::Define) - .add(MI->getOperand(0)) - .add(MI->getOperand(1)) - .addImm(Offset) - .add(MI->getOperand(3)) - .add(MI->getOperand(4)) - .cloneMemRefs(*MI); + unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask); + switch (AddrMode) { + case ARMII::AddrModeT2_i7: + case ARMII::AddrModeT2_i7s2: + case ARMII::AddrModeT2_i7s4: + // Any MVE load/store + return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) + .addReg(NewReg, RegState::Define) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addImm(Offset) + .add(MI->getOperand(3)) + .add(MI->getOperand(4)) + .cloneMemRefs(*MI); + case ARMII::AddrModeT2_i8: + if (MI->mayLoad()) { + return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) + .add(MI->getOperand(0)) + .addReg(NewReg, RegState::Define) + .add(MI->getOperand(1)) + .addImm(Offset) + .add(MI->getOperand(3)) + .add(MI->getOperand(4)) + .cloneMemRefs(*MI); + } else { + return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) + .addReg(NewReg, RegState::Define) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addImm(Offset) + .add(MI->getOperand(3)) + .add(MI->getOperand(4)) + .cloneMemRefs(*MI); + } + default: + llvm_unreachable("Unhandled createPostIncLoadStore"); + } } // Given a Base Register, optimise the load/store uses to attempt to create more @@ -2648,7 +2776,7 @@ if (DT->dominates(BaseAccess, Use)) { SuccessorAccesses.insert(Use); unsigned BaseOp = getBaseOperandIndex(*Use); - if (!isLegalAddressImm( + if (!isLegalOrConvertableAddressImm( Use->getOpcode(), Use->getOperand(BaseOp + 1).getImm() - IncrementOffset, TII)) { LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n"); @@ -2673,10 +2801,7 @@ for (auto *Use : SuccessorAccesses) { LLVM_DEBUG(dbgs() << "Changing: "; Use->dump()); - unsigned BaseOp = getBaseOperandIndex(*Use); - Use->getOperand(BaseOp).setReg(NewBaseReg); - int OldOffset = Use->getOperand(BaseOp + 1).getImm(); - Use->getOperand(BaseOp + 1).setImm(OldOffset - IncrementOffset); + AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII); LLVM_DEBUG(dbgs() << " To : "; Use->dump()); } Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -1809,16 +1809,15 @@ ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov s4, r4 ; CHECK-NEXT: vcvt.f16.s32 s4, s4 -; CHECK-NEXT: ldrsh.w r4, [r3] +; CHECK-NEXT: ldrsh r4, [r3], #8 ; CHECK-NEXT: vmul.f16 s2, s2, s4 ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vmov s6, r4 ; CHECK-NEXT: vcvt.f16.s32 s6, s6 -; CHECK-NEXT: ldrsh r5, [r3, #-2] -; CHECK-NEXT: ldrsh r4, [r3, #-4] +; CHECK-NEXT: ldrsh r5, [r3, #-10] +; CHECK-NEXT: ldrsh r4, [r3, #-12] ; CHECK-NEXT: vmul.f16 s4, s4, s6 ; CHECK-NEXT: vldr.16 s6, [r2, #-2] -; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vmov s8, r5 ; CHECK-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-NEXT: vmov s10, r4 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -425,17 +425,15 @@ ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #-8] ; CHECK-NEXT: ldrb r8, [r5, #-2] -; CHECK-NEXT: ldrb r7, [r6] +; CHECK-NEXT: ldrb r7, [r6], #4 ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #-4] ; CHECK-NEXT: ldrb r8, [r5, #-1] -; CHECK-NEXT: ldrb r7, [r6, #1] +; CHECK-NEXT: ldrb r7, [r6, #-3] ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4] -; CHECK-NEXT: ldrb.w r8, [r5] -; CHECK-NEXT: adds r5, #4 -; CHECK-NEXT: ldrb r7, [r6, #2] -; CHECK-NEXT: adds r6, #4 +; CHECK-NEXT: ldrb r8, [r5], #4 +; CHECK-NEXT: ldrb r7, [r6, #-2] ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #4] ; CHECK-NEXT: adds r4, #16 @@ -718,17 +716,15 @@ ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #-8] ; CHECK-NEXT: ldrb r8, [r5, #-2] -; CHECK-NEXT: ldrb r7, [r6] +; CHECK-NEXT: ldrb r7, [r6], #4 ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #-4] ; CHECK-NEXT: ldrb r8, [r5, #-1] -; CHECK-NEXT: ldrb r7, [r6, #1] +; CHECK-NEXT: ldrb r7, [r6, #-3] ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4] -; CHECK-NEXT: ldrb.w r8, [r5] -; CHECK-NEXT: adds r5, #4 -; CHECK-NEXT: ldrb r7, [r6, #2] -; CHECK-NEXT: adds r6, #4 +; CHECK-NEXT: ldrb r8, [r5], #4 +; CHECK-NEXT: ldrb r7, [r6, #-2] ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #4] ; CHECK-NEXT: adds r4, #16 Index: llvm/test/CodeGen/Thumb2/mve-float16regloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1177,31 +1177,30 @@ ; CHECK-NEXT: .LBB16_6: @ %for.body ; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldrh r0, [r6] +; CHECK-NEXT: ldrh r0, [r6], #16 ; CHECK-NEXT: vldrw.u32 q1, [r7] ; CHECK-NEXT: adds r3, r7, #2 ; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vldrw.u32 q1, [r3] -; CHECK-NEXT: ldrh r0, [r6, #2] +; CHECK-NEXT: ldrh r0, [r6, #-14] ; CHECK-NEXT: adds r3, r7, #6 ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: ldrh r0, [r6, #4] +; CHECK-NEXT: ldrh r0, [r6, #-12] ; CHECK-NEXT: vldrw.u32 q1, [r7, #4] ; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vldrw.u32 q1, [r3] -; CHECK-NEXT: ldrh r0, [r6, #6] +; CHECK-NEXT: ldrh r0, [r6, #-10] ; CHECK-NEXT: add.w r3, r7, #10 ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: ldrh r0, [r6, #8] +; CHECK-NEXT: ldrh r0, [r6, #-8] ; CHECK-NEXT: vldrw.u32 q1, [r7, #8] ; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vldrw.u32 q1, [r3] -; CHECK-NEXT: ldrh r0, [r6, #10] -; CHECK-NEXT: ldrh r3, [r6, #14] +; CHECK-NEXT: ldrh r0, [r6, #-6] +; CHECK-NEXT: ldrh r3, [r6, #-2] ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: ldrh r0, [r6, #12] +; CHECK-NEXT: ldrh r0, [r6, #-4] ; CHECK-NEXT: vldrw.u32 q1, [r7, #12] -; CHECK-NEXT: adds r6, #16 ; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: add.w r0, r7, #14 ; CHECK-NEXT: vldrw.u32 q1, [r0] Index: llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll +++ llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll @@ -106,14 +106,12 @@ ; CHECK-NEXT: wls lr, lr, .LBB1_7 ; CHECK-NEXT: .LBB1_5: @ %while.body11 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsh.w r5, [r0, #2] -; CHECK-NEXT: ldrsh.w r6, [r1] -; CHECK-NEXT: ldrsh.w r9, [r0] -; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: ldrsh.w r2, [r1, #2] -; CHECK-NEXT: adds r1, #4 -; CHECK-NEXT: smlalbb r4, r11, r6, r5 +; CHECK-NEXT: ldrsh r9, [r0], #4 +; CHECK-NEXT: ldrsh r6, [r1], #4 +; CHECK-NEXT: ldrsh r5, [r0, #-2] +; CHECK-NEXT: ldrsh r2, [r1, #-2] ; CHECK-NEXT: smlalbb r12, r7, r6, r9 +; CHECK-NEXT: smlalbb r4, r11, r6, r5 ; CHECK-NEXT: muls r5, r2, r5 ; CHECK-NEXT: smlalbb r4, r11, r2, r9 ; CHECK-NEXT: subs.w r12, r12, r5 Index: llvm/test/CodeGen/Thumb2/postinc-distribute.mir =================================================================== --- llvm/test/CodeGen/Thumb2/postinc-distribute.mir +++ llvm/test/CodeGen/Thumb2/postinc-distribute.mir @@ -57,9 +57,8 @@ ; CHECK-LABEL: name: t2LDRHi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRHi12_:%[0-9]+]]:rgpr = t2LDRHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: [[t2LDRH_POST:%[0-9]+]]:rgpr, [[t2LDRH_POST1:%[0-9]+]]:rgpr = t2LDRH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: $r0 = COPY [[t2LDRH_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = t2LDRHi12 %0, 0, 14, $noreg :: (load 4, align 4) @@ -84,9 +83,8 @@ ; CHECK-LABEL: name: t2LDRSHi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRSHi12_:%[0-9]+]]:rgpr = t2LDRSHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: [[t2LDRSH_POST:%[0-9]+]]:rgpr, [[t2LDRSH_POST1:%[0-9]+]]:rgpr = t2LDRSH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: $r0 = COPY [[t2LDRSH_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = t2LDRSHi12 %0, 0, 14, $noreg :: (load 4, align 4) @@ -111,9 +109,8 @@ ; CHECK-LABEL: name: t2LDRBi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: $r0 = COPY [[t2LDRB_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4) @@ -138,9 +135,8 @@ ; CHECK-LABEL: name: t2LDRSBi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRSBi12_:%[0-9]+]]:rgpr = t2LDRSBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: [[t2LDRSB_POST:%[0-9]+]]:rgpr, [[t2LDRSB_POST1:%[0-9]+]]:rgpr = t2LDRSB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: $r0 = COPY [[t2LDRSB_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = t2LDRSBi12 %0, 0, 14, $noreg :: (load 4, align 4) @@ -197,9 +193,8 @@ ; CHECK: liveins: $r0, $r1 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1 - ; CHECK: t2STRHi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: early-clobber %2:rgpr = t2STRH_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: $r0 = COPY %2 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = COPY $r1 @@ -227,9 +222,8 @@ ; CHECK: liveins: $r0, $r1 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1 - ; CHECK: t2STRBi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: early-clobber %2:rgpr = t2STRB_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: $r0 = COPY %2 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = COPY $r1