Index: lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.cpp +++ lib/Target/ARM/Thumb1FrameLowering.cpp @@ -512,6 +512,26 @@ return false; } +static void findTemporariesForLR(const BitVector &GPRsNoLRSP, + const BitVector &PopFriendly, + const LivePhysRegs &UsedRegs, unsigned &PopReg, + unsigned &TmpReg) { + PopReg = TmpReg = 0; + for (auto Reg : GPRsNoLRSP.set_bits()) { + if (!UsedRegs.contains(Reg)) { + // Remember the first pop-friendly register and exit. + if (PopFriendly.test(Reg)) { + PopReg = Reg; + TmpReg = 0; + break; + } + // Otherwise, remember that the register will be available to + // save a pop-friendly register. + TmpReg = Reg; + } + } +} + bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const { MachineFunction &MF = *MBB.getParent(); @@ -600,17 +620,19 @@ GPRsNoLRSP.reset(ARM::LR); GPRsNoLRSP.reset(ARM::SP); GPRsNoLRSP.reset(ARM::PC); - for (unsigned Register : GPRsNoLRSP.set_bits()) { - if (!UsedRegs.contains(Register)) { - // Remember the first pop-friendly register and exit. - if (PopFriendly.test(Register)) { - PopReg = Register; - TemporaryReg = 0; - break; - } - // Otherwise, remember that the register will be available to - // save a pop-friendly register. - TemporaryReg = Register; + findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); + + // If we couldn't find a pop-friendly register, restore LR before popping the + // other callee-saved registers, so we can use one of them as a temporary. + bool UseLDRSP = false; + if (!PopReg && MBBI != MBB.begin()) { + auto PrevMBBI = MBBI; + PrevMBBI--; + if (PrevMBBI->getOpcode() == ARM::tPOP) { + MBBI = PrevMBBI; + UsedRegs.stepBackward(*MBBI); + findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); + UseLDRSP = true; } } @@ -619,6 +641,26 @@ assert((PopReg || TemporaryReg) && "Cannot get LR"); + if (UseLDRSP) { + assert(PopReg && "Do not know how to get LR"); + // Load the LR via LDR tmp, [SP, #off] + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi)) + .addReg(PopReg, RegState::Define) + .addReg(ARM::SP) + .addImm(MBBI->getNumOperands() - 3) + .add(predOps(ARMCC::AL)); + // Move from the temporary register to the LR. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill) + .add(predOps(ARMCC::AL)); + // Advance past the pop instruction. + MBBI++; + // Increment the SP. + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize + 4); + return true; + } + if (TemporaryReg) { assert(!PopReg && "Unnecessary MOV is about to be inserted"); PopReg = PopFriendly.find_first(); @@ -911,33 +953,29 @@ if (Reg == ARM::LR) { Info.setRestored(false); - if (MBB.succ_empty()) { - // Special epilogue for vararg functions. See emitEpilogue - if (isVarArg) - continue; - // ARMv4T requires BX, see emitEpilogue - if (!STI.hasV5TOps()) - continue; - // Tailcall optimization failed; change TCRETURN to a tBL - if (MI->getOpcode() == ARM::TCRETURNdi || - MI->getOpcode() == ARM::TCRETURNri) { - unsigned Opcode = MI->getOpcode() == ARM::TCRETURNdi - ? ARM::tBL : ARM::tBLXr; - MachineInstrBuilder BL = BuildMI(MF, DL, TII.get(Opcode)); - BL.add(predOps(ARMCC::AL)); - BL.add(MI->getOperand(0)); - MBB.insert(MI, &*BL); - } - Reg = ARM::PC; - (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - if (MI != MBB.end()) - MIB.copyImplicitOps(*MI); - MI = MBB.erase(MI); - } else + if (!MBB.succ_empty() || + MI->getOpcode() == ARM::TCRETURNdi || + MI->getOpcode() == ARM::TCRETURNri) // LR may only be popped into PC, as part of return sequence. // If this isn't the return sequence, we'll need emitPopSpecialFixUp // to restore LR the hard way. + // FIXME: if we don't pass any stack arguments it would be actually + // advantageous *and* correct to do the conversion to an ordinary call + // instruction here. continue; + // Special epilogue for vararg functions. See emitEpilogue + if (isVarArg) + continue; + // ARMv4T requires BX, see emitEpilogue + if (!STI.hasV5TOps()) + continue; + + // Pop LR into PC. + Reg = ARM::PC; + (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + if (MI != MBB.end()) + MIB.copyImplicitOps(*MI); + MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); NeedsPop = true; Index: test/CodeGen/ARM/thumb1_return_sequence.ll =================================================================== --- test/CodeGen/ARM/thumb1_return_sequence.ll +++ test/CodeGen/ARM/thumb1_return_sequence.ll @@ -25,23 +25,14 @@ ; -------- ; Stack realignment means sp is restored from frame pointer ; CHECK-V4T: mov sp +; CHECK-V4T-NEXT: ldr [[POP:r[4567]]], [sp, #{{.*}}] +; CHECK-V4T-NEXT: mov lr, [[POP]] ; CHECK-V4T-NEXT: pop {[[SAVED]]} +; CHECK-V4T-NEXT add sp, sp, #4 ; The ISA for v4 does not support pop pc, so make sure we do not emit ; one even when we do not need to update SP. ; CHECK-V4T-NOT: pop {pc} -; We may only use lo register to pop, but in that case, all the scratch -; ones are used. -; r12 is the only register we are allowed to clobber for AAPCS. -; Use it to save a lo register. -; CHECK-V4T-NEXT: mov [[TEMP_REG:r12]], [[POP_REG:r[0-7]]] -; Pop the value of LR. -; CHECK-V4T-NEXT: pop {[[POP_REG]]} -; Copy the value of LR in the right register. -; CHECK-V4T-NEXT: mov lr, [[POP_REG]] -; Restore the value that was in the register we used to pop the value of LR. -; CHECK-V4T-NEXT: mov [[POP_REG]], [[TEMP_REG]] -; Return. -; CHECK-V4T-NEXT: bx lr +; CHECK-V4T: bx lr ; CHECK-V5T: pop {[[SAVED]], pc} } @@ -66,22 +57,18 @@ ; Epilogue ; -------- -; CHECK-V4T: pop {[[SAVED]]} -; CHECK-V4T-NEXT: mov r12, [[POP_REG:r[0-7]]] -; CHECK-V4T-NEXT: pop {[[POP_REG]]} -; CHECK-V4T-NEXT: add sp, -; CHECK-V4T-NEXT: mov lr, [[POP_REG]] -; CHECK-V4T-NEXT: mov [[POP_REG]], r12 -; CHECK-V4T: bx lr +; CHECK-V4T: ldr [[POP:r[4567]]], [sp, #{{.*}}] +; CHECK-V4T-NEXT: mov lr, [[POP]] +; CHECK-V4T-NEXT: pop {[[SAVED]]} +; CHECK-V4T-NEXT: add sp, #16 +; CHECK-V4T-NEXT: bx lr ; CHECK-V5T: lsls r4 ; CHECK-V5T-NEXT: mov sp, r4 -; CHECK-V5T: pop {[[SAVED]]} -; CHECK-V5T-NEXT: mov r12, [[POP_REG:r[0-7]]] -; CHECK-V5T-NEXT: pop {[[POP_REG]]} -; CHECK-V5T-NEXT: add sp, -; CHECK-V5T-NEXT: mov lr, [[POP_REG]] -; CHECK-V5T-NEXT: mov [[POP_REG]], r12 -; CHECK-V5T-NEXT: bx lr +; CHECK-V5T: ldr [[POP:r[4567]]], [sp, #{{.*}}] +; CHECK-V5T-NEXT: mov lr, [[POP]] +; CHECK-V5T-NEXT: pop {[[SAVED]]} +; CHECK-V5T-NEXT: add sp, #16 +; CHECK-V5T-NEXT: bx lr } ; CHECK-V4T-LABEL: simpleframe Index: test/CodeGen/ARM/v8m-tail-call.ll =================================================================== --- test/CodeGen/ARM/v8m-tail-call.ll +++ test/CodeGen/ARM/v8m-tail-call.ll @@ -1,23 +1,47 @@ ; RUN: llc %s -o - -mtriple=thumbv8m.base | FileCheck %s -define void @test() { -; CHECK-LABEL: test: -entry: - %call = tail call i32 @foo() - %tail = tail call i32 @foo() - ret void -; CHECK: bl foo -; CHECK: bl foo -; CHECK-NOT: b foo +declare i32 @g(...) + +declare i32 @h0(i32, i32, i32, i32) +define hidden i32 @f0() { + %1 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() + %2 = tail call i32 @h0(i32 %1, i32 1, i32 2, i32 3) + ret i32 %2 +; CHECK-LABEL: f0 +; CHECK: ldr [[POP:r[4567]]], [sp +; CHECK-NEXT: mov lr, [[POP]] +; CHECK-NEXT: pop {{.*}}[[POP]] +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: b h0 } -define void @test2() { -; CHECK-LABEL: test2: -entry: - %tail = tail call i32 @foo() - ret void -; CHECK: b foo -; CHECK-NOT: bl foo +declare i32 @h1(i32) +define hidden i32 @f1() { + %1 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() + %2 = tail call i32 @h1(i32 %1) + ret i32 %2 +; CHECK-LABEL: f1 +; CHECK: pop {r7} +; CHECK: pop {r1} +; CHECK: mov lr, r1 +; CHECK: b h1 } -declare i32 @foo() +declare i32 @h2(i32, i32, i32, i32, i32) +define hidden i32 @f2(i32, i32, i32, i32, i32) { + %6 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() + %7 = icmp eq i32 %6, 0 + br i1 %7, label %10, label %8 + + %9 = tail call i32 @h2(i32 %6, i32 %1, i32 %2, i32 %3, i32 %4) + br label %10 + + %11 = phi i32 [ %9, %8 ], [ -1, %5 ] + ret i32 %11 +; CHECK-LABEL: f2 +; CHECK: ldr [[POP:r[4567]]], [sp +; CHECK-NEXT: mov lr, [[POP]] +; CHECK-NEXT: pop {{.*}}[[POP]] +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: b h2 +} Index: test/CodeGen/Thumb/thumb-shrink-wrapping.ll =================================================================== --- test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -647,11 +647,10 @@ ; ENABLE: push {r4, lr} ; CHECK: tst r3, r4 -; ENABLE-NEXT: pop {r4} -; ENABLE-NEXT: mov r12, r{{.*}} -; ENABLE-NEXT: pop {r0} -; ENABLE-NEXT: mov lr, r0 -; ENABLE-NEXT: mov r0, r12 +; ENABLE-NEXT: ldr [[POP:r[4567]]], [sp, #8] +; ENABLE-NEXT: mov lr, [[POP]] +; ENABLE-NEXT: pop {[[POP]]} +; ENABLE-NEXT: add sp, #4 ; CHECK-NEXT: beq [[EXIT_LABEL]] ; CHECK: str r1, [r2]