Index: lib/Target/ARM/Thumb2SizeReduction.cpp =================================================================== --- lib/Target/ARM/Thumb2SizeReduction.cpp +++ lib/Target/ARM/Thumb2SizeReduction.cpp @@ -115,12 +115,14 @@ { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, @@ -422,6 +424,45 @@ HasShift = true; OpNum = 4; break; + case ARM::t2LDR_POST: + case ARM::t2STR_POST: { + if (!MBB.getParent()->getFunction()->optForMinSize()) + return false; + + // We're creating a completely different type of load/store - LDM from LDR. + // For this reason we can't reuse the logic at the end of this function; we + // have to implement the MI building here. + bool IsStore = Entry.WideOpc == ARM::t2STR_POST; + unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); + unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); + unsigned Offset = MI->getOperand(3).getImm(); + unsigned PredImm = MI->getOperand(4).getImm(); + unsigned PredReg = MI->getOperand(5).getReg(); + assert(isARMLowRegister(Rt)); + assert(isARMLowRegister(Rn)); + + if (Offset != 4) + return false; + + // Add the 16-bit load / store instruction. + DebugLoc dl = MI->getDebugLoc(); + auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1)) + .addReg(Rn, RegState::Define) + .addReg(Rn) + .addImm(PredImm) + .addReg(PredReg) + .addReg(Rt, IsStore ? 0 : RegState::Define); + + // Transfer memoperands. + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + + // Kill the old instruction. + MI->eraseFromParent(); + return true; + } case ARM::t2LDMIA: { unsigned BaseReg = MI->getOperand(0).getReg(); assert(isARMLowRegister(BaseReg)); Index: test/CodeGen/ARM/t2-shrink-ldrpost.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/t2-shrink-ldrpost.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" +target triple = "thumbv7m--linux-gnu" + +; CHECK-LABEL: f: +; CHECK: ldm r{{[0-9]}}!, {r[[x:[0-9]]]} +; CHECK: add.w r[[x]], r[[x]], #3 +; CHECK: stm r{{[0-9]}}!, {r[[x]]} +define void @f(i32 %n, i32* nocapture %a, i32* nocapture readonly %b) optsize minsize { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %.lr.ph, %0 + %i.04 = phi i32 [ %6, %.lr.ph ], [ 0, %0 ] + %.03 = phi i32* [ %2, %.lr.ph ], [ %b, %0 ] + %.012 = phi i32* [ %5, %.lr.ph ], [ %a, %0 ] + %2 = getelementptr inbounds i32, i32* %.03, i32 1 + %3 = load i32, i32* %.03, align 4 + %4 = add nsw i32 %3, 3 + %5 = getelementptr inbounds i32, i32* %.012, i32 1 + store i32 %4, i32* %.012, align 4 + %6 = add nsw i32 %i.04, 1 + %exitcond = icmp eq i32 %6, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret void +}