Index: lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.cpp +++ lib/Target/ARM/Thumb1FrameLowering.cpp @@ -406,11 +406,11 @@ if (AFI->getArgRegsSaveSize()) return true; - bool IsV4PopReturn = false; for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo()) if (CSI.getReg() == ARM::LR) - IsV4PopReturn = true; - return IsV4PopReturn && STI.hasV4TOps() && !STI.hasV5TOps(); + return true; + + return false; } bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, @@ -422,22 +422,40 @@ const ThumbRegisterInfo *RegInfo = static_cast(STI.getRegisterInfo()); - // If MBBI is a return instruction, we may be able to directly restore + // If MBBI is a return instruction, or is a tPOP followed by a return + // instruction in the successor BB, we may be able to directly restore // LR in the PC. // This is possible if we do not need to emit any SP update. // Otherwise, we need a temporary register to pop the value // and copy that value into LR. auto MBBI = MBB.getFirstTerminator(); - if (!ArgRegsSaveSize && MBBI != MBB.end() && - MBBI->getOpcode() == ARM::tBX_RET) { - if (!DoIt) + bool CanRestoreDirectly = !ArgRegsSaveSize; + if (CanRestoreDirectly) { + if (MBBI != MBB.end()) + CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || + MBBI->getOpcode() == ARM::tPOP_RET); + else { + assert(MBB.back().getOpcode() == ARM::tPOP); + assert(MBB.succ_size() == 1); + if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) + MBBI--; // replace the final tPOP with a tPOP_RET + else + CanRestoreDirectly = false; + } + } + + if (CanRestoreDirectly) { + if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) return true; MachineInstrBuilder MIB = AddDefaultPred( - BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))) - .addReg(ARM::PC, RegState::Define); - MIB.copyImplicitOps(&*MBBI); - // erase the old tBX_RET instruction + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))); + for (auto MO: MBBI->operands()) + if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && + MO.getReg() != ARM::LR) + MIB.addOperand(MO); + MIB.addReg(ARM::PC, RegState::Define); + // erase the old instruction (tBX_RET or tPOP) MBB.erase(MBBI); return true; } @@ -459,10 +477,10 @@ if (MBBI != MBB.end()) { dl = MBBI->getDebugLoc(); auto InstUpToMBBI = MBB.end(); - // The post-decrement is on purpose here. - // We want to have the liveness right before MBBI. - while (InstUpToMBBI-- != MBBI) - UsedRegs.stepBackward(*InstUpToMBBI); + while (InstUpToMBBI != MBBI) + // The pre-decrement is on purpose here. + // We want to have the liveness right before MBBI. + UsedRegs.stepBackward(*--InstUpToMBBI); } // Look for a register that can be directly use in the POP. @@ -509,8 +527,14 @@ } assert(PopReg && "Do not know how to get LR"); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) - .addReg(PopReg, RegState::Define); + MachineInstr *Pop; + if (MBBI == MBB.end()) { + Pop = &MBB.back(); + assert(Pop->getOpcode() == ARM::tPOP); + Pop->RemoveOperand(Pop->findRegisterDefOperandIdx(ARM::LR)); + } else + Pop = AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))); + Pop->addOperand(MachineOperand::CreateReg(PopReg, true)); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); Index: test/CodeGen/Thumb/pop-special-fixup.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/pop-special-fixup.ll @@ -0,0 +1,59 @@ +; RUN: llc %s -enable-shrink-wrap=true -o - | FileCheck %s + +target triple = "thumbv6m-none-none-eabi" + +@retval = global i32 0, align 4 + +define i32 @test(i32 %i, i32 %argc, i8** nocapture readonly %argv) { + %1 = icmp sgt i32 %argc, %i + br i1 %1, label %2, label %19 + + %3 = getelementptr inbounds i8*, i8** %argv, i32 %i + %4 = load i8*, i8** %3, align 4 + %5 = load i8, i8* %4, align 1 + %6 = icmp eq i8 %5, 45 + %7 = getelementptr inbounds i8, i8* %4, i32 1 + %. = select i1 %6, i8* %7, i8* %4 + %.1 = select i1 %6, i32 -1, i32 1 + %8 = load i8, i8* %., align 1 + %.off2 = add i8 %8, -48 + %9 = icmp ult i8 %.off2, 10 + %.pre = load i32, i32* @retval, align 4 + br i1 %9, label %.lr.ph.preheader, label %.critedge + +.lr.ph.preheader: ; preds = %2 + br label %.lr.ph + +.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph + %10 = phi i32 [ %14, %.lr.ph ], [ %.pre, %.lr.ph.preheader ] + %11 = phi i8 [ %15, %.lr.ph ], [ %8, %.lr.ph.preheader ] + %valstring.03 = phi i8* [ %13, %.lr.ph ], [ %., %.lr.ph.preheader ] + %12 = zext i8 %11 to i32 + %13 = getelementptr inbounds i8, i8* %valstring.03, i32 1 + %14 = add nsw i32 %10, %12 + store i32 %14, i32* @retval, align 4 + %15 = load i8, i8* %13, align 1 + %.off = add i8 %15, -48 + %16 = icmp ult i8 %.off, 10 + br i1 %16, label %.lr.ph, label %.critedge.loopexit + +.critedge.loopexit: ; preds = %.lr.ph + %.lcssa = phi i32 [ %14, %.lr.ph ] + br label %.critedge + +.critedge: ; preds = %.critedge.loopexit, %2 + %17 = phi i32 [ %.pre, %2 ], [ %.lcssa, %.critedge.loopexit ] + %18 = mul nsw i32 %17, %.1 + store i32 %18, i32* @retval, align 4 + br label %19 + +;