diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -364,6 +364,16 @@ /// constructing an outlined call if one exists. Returns 0 otherwise. unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; + /// Adds an instruction which saves the link register on top of the stack into + /// the MachineBasicBlock \p MBB at position \p It. + void saveLROnStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It) const; + + /// Adds an instruction which restores the link register from the top the + /// stack into the MachineBasicBlock \p MBB at position \p It. + void restoreLRFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It) const; + unsigned getInstBundleLength(const MachineInstr &MI) const; int getVLDMDefCycle(const InstrItineraryData *ItinData, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5544,12 +5544,32 @@ /// | Frame overhead in Bytes | 2 | 4 | /// | Stack fixup required | No | No | /// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerDefault implies that the function should be called with +/// a save and restore of LR to the stack. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 8 | 12 | +/// | Frame overhead in Bytes | 2 | 4 | +/// | Stack fixup required | Yes | Yes | +/// +-------------------------+--------+-----+ enum MachineOutlinerClass { MachineOutlinerTailCall, MachineOutlinerThunk, MachineOutlinerNoLRSave, - MachineOutlinerRegSave + MachineOutlinerRegSave, + MachineOutlinerDefault }; enum MachineOutlinerMBBFlags { @@ -5567,6 +5587,8 @@ const int FrameNoLRSave; const int CallRegSave; const int FrameRegSave; + const int CallDefault; + const int FrameDefault; OutlinerCosts(const ARMSubtarget &target) : CallTailCall(target.isThumb() ? 4 : 4), @@ -5576,7 +5598,9 @@ CallNoLRSave(target.isThumb() ? 4 : 4), FrameNoLRSave(target.isThumb() ? 4 : 4), CallRegSave(target.isThumb() ? 8 : 12), - FrameRegSave(target.isThumb() ? 2 : 4) {} + FrameRegSave(target.isThumb() ? 2 : 4), + CallDefault(target.isThumb() ? 8 : 12), + FrameDefault(target.isThumb() ? 2 : 4) {} }; unsigned @@ -5667,8 +5691,8 @@ }; OutlinerCosts *Costs = new OutlinerCosts(Subtarget); - unsigned FrameID = 0; - unsigned NumBytesToCreateFrame = 0; + unsigned FrameID = MachineOutlinerDefault; + unsigned NumBytesToCreateFrame = Costs->FrameDefault; // If the last instruction in any candidate is a terminator, then we should // tail call all of the candidates. @@ -5684,7 +5708,8 @@ SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk); } else { // We need to decide how to emit calls + frames. We can always emit the same - // frame if we don't need to save to the stack. + // frame if we don't need to save to the stack. If we have to save to the + // stack, then we need a different frame. unsigned NumBytesNoStackCalls = 0; std::vector CandidatesWithoutStackFixups; @@ -5707,12 +5732,51 @@ C.setCallInfo(MachineOutlinerRegSave, Costs->CallRegSave); CandidatesWithoutStackFixups.push_back(C); } + + // Is SP used in the sequence at all? If not, we don't have to modify + // the stack, so we are guaranteed to get the same frame. + else if (C.UsedInSequence.available(ARM::SP)) { + NumBytesNoStackCalls += Costs->CallDefault; + C.setCallInfo(MachineOutlinerDefault, Costs->CallDefault); + CandidatesWithoutStackFixups.push_back(C); + } + + // If we outline this, we need to modify the stack. Pretend we don't + // outline this by saving all of its bytes. + else { + NumBytesNoStackCalls += SequenceSize; + } } - if (!CandidatesWithoutStackFixups.empty()) { + // If there are no places where we have to save LR, then note that we don't + // have to update the stack. Otherwise, give every candidate the default + // call type. + if (NumBytesNoStackCalls <= + RepeatedSequenceLocs.size() * Costs->CallDefault) { RepeatedSequenceLocs = CandidatesWithoutStackFixups; - } else - return outliner::OutlinedFunction(); + FrameID = MachineOutlinerNoLRSave; + } else { + SetCandidateCallInfo(MachineOutlinerDefault, Costs->CallDefault); + } + } + + // Does every candidate's MBB contain a call? If so, then we might have a + // call in the range. + if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { + // Check if the range contains a call. These require a save + restore of the + // link register. + if (std::any_of(FirstCand.front(), FirstCand.back(), + [](const MachineInstr &MI) { return MI.isCall(); })) + NumBytesToCreateFrame += Costs->FrameDefault; + + // Handle the last instruction separately. If this is a tail call, then the + // last instruction is a call. We don't want to save + restore in this case. + // However, it could be possible that the last instruction is a call without + // it being valid to tail call this sequence. We should consider this as + // well. + else if (FrameID != MachineOutlinerThunk && + FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) + NumBytesToCreateFrame += Costs->FrameThunk; } return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, @@ -5788,6 +5852,8 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const { MachineInstr &MI = *MIT; + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction *MF = MBB->getParent(); const TargetRegisterInfo *TRI = &getRegisterInfo(); // Be conservative with inline ASM @@ -5846,6 +5912,21 @@ } if (MI.isCall()) { + // Get the function associated with the call. Look at each operand and find + // the one that represents the callee and get its name. + const Function *Callee = nullptr; + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isGlobal()) { + Callee = dyn_cast(MOP.getGlobal()); + break; + } + } + + // Never outline calls to mcount. There isn't any rule that would require + // this, but the Linux kernel's "ftrace" feature depends on it. + if (Callee && Callee->getName() == "\01_mcount") + return outliner::InstrType::Illegal; + // If we don't know anything about the callee, assume it depends on the // stack layout of the caller. In that case, it's only legal to outline // as a tail-call. Whitelist the call instructions we know about so we @@ -5855,7 +5936,28 @@ Opc == ARM::tBLXr || Opc == ARM::tBLXi) UnknownCallOutlineType = outliner::InstrType::LegalTerminator; - return UnknownCallOutlineType; + if (!Callee) + return UnknownCallOutlineType; + + // We have a function we have information about. Check it if it's something + // can safely outline. + MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); + + // We don't know what's going on with the callee at all. Don't touch it. + if (!CalleeMF) + return UnknownCallOutlineType; + + // Check if we know anything about the callee saves on the function. If we + // don't, then don't touch it, since that implies that we haven't + // computed anything about its stack frame yet. + MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || + MFI.getNumObjects() > 0) + return UnknownCallOutlineType; + + // At this point, we can say that CalleeMF ought to not pass anything on the + // stack. Therefore, we can outline it. + return outliner::InstrType::Legal; } // Don't outline positions. @@ -5909,9 +6011,32 @@ return outliner::InstrType::Legal; } +void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It) const { + unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; + int Align = -Subtarget.getStackAlignment().value(); + BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP) + .addReg(ARM::LR, RegState::Kill) + .addReg(ARM::SP) + .addImm(Align) + .add(predOps(ARMCC::AL)); +} + +void ARMBaseInstrInfo::restoreLRFromStack( + MachineBasicBlock &MBB, MachineBasicBlock::iterator &It) const { + unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; + MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP); + if (!Subtarget.isThumb()) + MIB.addReg(0); + MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL)); +} + void ARMBaseInstrInfo::buildOutlinedFrame( MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const { + unsigned StackAlignment = Subtarget.getStackAlignment().value(); // For thunk outlining, rewrite the last instruction from a call to a // tail-call. if (OF.FrameConstructionID == MachineOutlinerThunk) { @@ -5936,6 +6061,49 @@ .add(Call->getOperand(0)); Call->eraseFromParent(); } + // Is there a call in the outlined range? + auto IsNonTailCall = [](MachineInstr &MI) { + return MI.isCall() && !MI.isReturn(); + }; + if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { + // LR has to be a live in so that we can save it. + MBB.addLiveIn(ARM::LR); + + MachineBasicBlock::iterator It = MBB.begin(); + MachineBasicBlock::iterator Et = MBB.end(); + + if (OF.FrameConstructionID == MachineOutlinerTailCall || + OF.FrameConstructionID == MachineOutlinerThunk) + Et = std::prev(MBB.end()); + + // Insert a save before the outlined region + saveLROnStack(MBB, It); + + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const MCRegisterInfo *MRI = STI.getRegisterInfo(); + unsigned DwarfReg = MRI->getDwarfRegNum(ARM::LR, true); + + // Add a CFI saying the stack was moved down. + int64_t StackPosEntry = + MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, + StackAlignment)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(StackPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + + // Add a CFI saying that the LR that we want to find is now higher than + // before. + int64_t LRPosEntry = + MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, + StackAlignment)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(LRPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + + // Insert a restore before the terminator for the function. + // Restore the link register. + restoreLRFromStack(MBB, Et); + } // If this is a tail call outlined function, then there's already a return. if (OF.FrameConstructionID == MachineOutlinerTailCall || @@ -5977,9 +6145,17 @@ CallMIB.add(predOps(ARMCC::AL)); CallMIB.addGlobalAddress(M.getNamedValue(MF.getName())); + // Are we saving the link register? + if (C.CallConstructionID == MachineOutlinerNoLRSave || + C.CallConstructionID == MachineOutlinerThunk) { + // No, so just insert the call. + It = MBB.insert(It, CallMIB); + return It; + } + + MBB.addLiveIn(ARM::LR); // Can we save to a register? if (C.CallConstructionID == MachineOutlinerRegSave) { - MBB.addLiveIn(ARM::LR); unsigned Reg = findRegisterToSaveLRTo(C); assert(Reg != 0 && "No callee-saved register available?"); @@ -5991,6 +6167,10 @@ return CallPt; } - It = MBB.insert(It, CallMIB); - return It; + // We have the default case. Save and restore from SP. + saveLROnStack(MBB, It); + CallPt = MBB.insert(It, CallMIB); + restoreLRFromStack(MBB, It); + It--; + return CallPt; } diff --git a/llvm/test/CodeGen/ARM/machine-outliner-nosave-and-regs.mir b/llvm/test/CodeGen/ARM/machine-outliner-nosave-and-regs.mir --- a/llvm/test/CodeGen/ARM/machine-outliner-nosave-and-regs.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-nosave-and-regs.mir @@ -7,6 +7,7 @@ define void @outline_no_save_1() #0 { ret void } define void @outline_no_save_2() #0 { ret void } define void @outline_save_reg() #0 { ret void } + define void @outline_save_noreg() #0 { ret void } define void @outline_cpsr_ok() #0 { ret void } define void @dont_outline_cpsr() #0 { ret void } define void @dont_outline_r12() #0 { ret void } @@ -63,6 +64,24 @@ ... --- +name: outline_save_noreg +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r8, $r9, $r10, $r11 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVr $lr, 14, $noreg, $noreg + bb.1: + liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r8, $r9, $r10, $r11 + BX_RET 14, $noreg +... +--- + name: outline_cpsr_ok tracksRegLiveness: true body: |