Index: lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -151,6 +151,7 @@ bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); + bool CombineMovBx(MachineBasicBlock &MBB); }; char ARMLoadStoreOpt::ID = 0; } @@ -1825,6 +1826,30 @@ return false; } +bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + if (MBBI == MBB.begin() || MBBI == MBB.end() || + MBBI->getOpcode() != ARM::tBX_RET) + return false; + + MachineBasicBlock::iterator Prev = MBBI; + --Prev; + if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR)) + return false; + + for (auto Use : Prev->uses()) + if (Use.isKill()) { + AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) + .addReg(Use.getReg(), RegState::Kill)) + .copyImplicitOps(&*MBBI); + MBB.erase(MBBI); + MBB.erase(Prev); + return true; + } + + llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?"); +} + bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { MF = &Fn; STI = &static_cast(Fn.getSubtarget()); @@ -1844,6 +1869,8 @@ Modified |= LoadStoreMultipleOpti(MBB); if (STI->hasV5TOps()) Modified |= MergeReturnIntoLDM(MBB); + if (isThumb1) + Modified |= CombineMovBx(MBB); } Allocator.DestroyAll(); Index: lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.cpp +++ lib/Target/ARM/Thumb1FrameLowering.cpp @@ -433,14 +433,16 @@ auto MBBI = MBB.getFirstTerminator(); bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; if (CanRestoreDirectly) { - if (MBBI != MBB.end()) + if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET); else { - assert(MBB.back().getOpcode() == ARM::tPOP); + auto MBBI_prev = MBBI; + MBBI_prev--; + assert(MBBI_prev->getOpcode() == ARM::tPOP); assert(MBB.succ_size() == 1); if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) - MBBI--; // Replace the final tPOP with a tPOP_RET. + MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. else CanRestoreDirectly = false; } @@ -529,32 +531,31 @@ .addReg(PopReg, RegState::Kill)); } - bool AddBx = false; - if (MBBI == MBB.end()) { - MachineInstr& Pop = MBB.back(); - assert(Pop.getOpcode() == ARM::tPOP); - Pop.RemoveOperand(Pop.findRegisterDefOperandIdx(ARM::LR)); + if (MBBI == MBB.end() || MBBI->getOpcode() == ARM::tB) { + auto Pop = MBBI; + Pop--; + assert(Pop->getOpcode() == ARM::tPOP); + Pop->RemoveOperand(Pop->findRegisterDefOperandIdx(ARM::LR)); } else if (MBBI->getOpcode() == ARM::tPOP_RET) { // We couldn't use the direct restoration above, so // perform the opposite conversion: tPOP_RET to tPOP. MachineInstrBuilder MIB = AddDefaultPred( BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))); - unsigned Popped = 0; + bool Popped = false; for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && MO.getReg() != ARM::PC) { MIB.addOperand(MO); if (!MO.isImplicit()) - Popped++; + Popped = true; } // Is there anything left to pop? if (!Popped) MBB.erase(MIB.getInstr()); // Erase the old instruction. MBB.erase(MBBI); - MBBI = MBB.end(); - AddBx = true; + MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))); } assert(PopReg && "Do not know how to get LR"); @@ -563,31 +564,14 @@ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); - if (!TemporaryReg && MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET) { - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) - .addReg(PopReg, RegState::Kill); - AddDefaultPred(MIB); - MIB.copyImplicitOps(&*MBBI); - // erase the old tBX_RET instruction - MBB.erase(MBBI); - return true; - } + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill)); - if (AddBx && !TemporaryReg) { - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) - .addReg(PopReg, RegState::Kill)); - } else { - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::LR, RegState::Define) - .addReg(PopReg, RegState::Kill)); - } - if (TemporaryReg) { + if (TemporaryReg) AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(PopReg, RegState::Define) .addReg(TemporaryReg, RegState::Kill)); - if (AddBx) - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET))); - } return true; } Index: test/CodeGen/Thumb/thumb-pop.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/thumb-pop.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=thumbv5e-none-linux-gnueabi-eabi -verify-machineinstrs -o - | FileCheck %s + +define i32 @test(i32 %value) { +; CHECK-LABEL: test: +; CHECK-NOT: pop {[[POP_REG:r[0-7]]], lr} +entry: + %cmp = icmp slt i32 %value, 50 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %div = sdiv i32 5000, %value + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %value, 1 + %sub = sub nsw i32 200, %mul + br label %if.end + +if.end: ; preds = %if.else, %if.then + %value.addr.0 = phi i32 [ %div, %if.then ], [ %sub, %if.else ] + ret i32 %value.addr.0 +} + Index: test/CodeGen/Thumb/thumb-shrink-wrapping.ll =================================================================== --- test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -159,6 +159,7 @@ ; DISABLE-V4T-NEXT: pop {r1} ; DISABLE-V4T-NEXT: bx r1 ; +; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end ; ENABLE-NEXT: bx lr define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { entry: @@ -270,7 +271,10 @@ ; Next BB. ; SUM << 3. ; CHECK: lsls [[SUM]], [[SUM]], #3 -; ENABLE-NEXT: pop {r4, lr} +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 ; ; Duplicated epilogue. ; DISABLE-V5T: pop {r4, pc} @@ -285,6 +289,7 @@ ; DISABLE-V4T-NEXT: pop {r1} ; DISABLE-V4T-NEXT: bx r1 ; +; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end ; ENABLE-NEXT: bx lr define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { entry: @@ -350,7 +355,10 @@ ; Next BB. ; SUM << 3. ; CHECK: lsls [[SUM]], [[SUM]], #3 -; ENABLE: pop {r4, lr} +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 ; ; Duplicated epilogue. ; DISABLE-V5T: pop {r4, pc} @@ -365,6 +373,7 @@ ; DISABLE-V4T-NEXT: pop {r1} ; DISABLE-V4T-NEXT: bx r1 ; +; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end ; ENABLE-NEXT: bx lr define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { entry: @@ -431,7 +440,10 @@ ; ; Next BB. ; CHECK: movs r0, #0 -; ENABLE-NEXT: pop {r4, lr} +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 ; ; Duplicated epilogue. ; DISABLE-V5T-NEXT: pop {r4, pc} @@ -446,6 +458,7 @@ ; DISABLE-V4T-NEXT: pop {r1} ; DISABLE-V4T-NEXT: bx r1 ; +; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end ; ENABLE-NEXT: bx lr define i32 @inlineAsm(i32 %cond, i32 %N) { entry: @@ -506,7 +519,10 @@ ; CHECK-NEXT: lsls r0, r0, #3 ; ; ENABLE-NEXT: add sp, #16 -; ENABLE-NEXT: pop {[[TMP]], lr} +; ENABLE-V5T-NEXT: pop {[[TMP]], pc} +; ENABLE-V4T-NEXT: pop {[[TMP]]} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 ; ; Duplicated epilogue. ; DISABLE-V5T-NEXT: add sp, #16 @@ -518,6 +534,7 @@ ; CHECK: lsls r0, r1, #1 ; ; Epilogue code. +; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end ; ENABLE-NEXT: bx lr ; ; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end