diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -360,6 +360,10 @@ const outliner::Candidate &C) const override; private: + /// Returns an unused general-purpose register which can be used for + /// constructing an outlined call if one exists. Returns 0 otherwise. + unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; + unsigned getInstBundleLength(const MachineInstr &MI) const; int getVLDMDefCycle(const InstrItineraryData *ItinData, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5503,10 +5503,53 @@ /// | Frame overhead in Bytes | 0 | 0 | /// | Stack fixup required | No | No | /// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerNoLRSave implies that the function should be called using +/// a BL instruction, but doesn't require LR to be saved and restored. This +/// happens when LR is known to be dead. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 4 | 4 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerRegSave implies that the function should be called with a +/// save and restore of LR to an available register. This allows us to avoid +/// stack fixups. Note that this outlining variant is compatible with the +/// NoLRSave case. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 8 | 12 | +/// | Frame overhead in Bytes | 2 | 4 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ enum MachineOutlinerClass { MachineOutlinerTailCall, - MachineOutlinerThunk + MachineOutlinerThunk, + MachineOutlinerNoLRSave, + MachineOutlinerRegSave }; enum MachineOutlinerMBBFlags { @@ -5520,14 +5563,45 @@ const int FrameTailCall; const int CallThunk; const int FrameThunk; + const int CallNoLRSave; + const int FrameNoLRSave; + const int CallRegSave; + const int FrameRegSave; OutlinerCosts(const ARMSubtarget &target) : CallTailCall(target.isThumb() ? 4 : 4), FrameTailCall(target.isThumb() ? 0 : 0), CallThunk(target.isThumb() ? 4 : 4), - FrameThunk(target.isThumb() ? 0 : 0) {} + FrameThunk(target.isThumb() ? 0 : 0), + CallNoLRSave(target.isThumb() ? 4 : 4), + FrameNoLRSave(target.isThumb() ? 4 : 4), + CallRegSave(target.isThumb() ? 8 : 12), + FrameRegSave(target.isThumb() ? 2 : 4) {} }; +unsigned +ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { + assert(C.LRUWasSet && "LRU wasn't set?"); + MachineFunction *MF = C.getMF(); + const ARMBaseRegisterInfo *ARI = static_cast( + MF->getSubtarget().getRegisterInfo()); + + BitVector regsReserved = ARI->getReservedRegs(*MF); + // Check if there is an available register across the sequence that we can + // use. + for (unsigned Reg : + (Subtarget.isThumb() ? ARM::tGPRRegClass : ARM::GPRRegClass)) { + if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) && + Reg != ARM::LR && // LR is not reserved, but don't use it. + Reg != ARM::R12 && // R12 is not guaranteed to be preserved. + C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + return Reg; + } + + // No suitable register. Return 0. + return 0u; +} + outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; @@ -5608,8 +5682,38 @@ FrameID = MachineOutlinerThunk; NumBytesToCreateFrame = Costs->FrameThunk; SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk); - } else - return outliner::OutlinedFunction(); + } else { + // We need to decide how to emit calls + frames. We can always emit the same + // frame if we don't need to save to the stack. + unsigned NumBytesNoStackCalls = 0; + std::vector CandidatesWithoutStackFixups; + + for (outliner::Candidate &C : RepeatedSequenceLocs) { + C.initLRU(TRI); + + // Is LR available? If so, we don't need a save. + if (C.LRU.available(ARM::LR)) { + FrameID = MachineOutlinerNoLRSave; + NumBytesNoStackCalls += Costs->CallNoLRSave; + C.setCallInfo(MachineOutlinerNoLRSave, Costs->CallNoLRSave); + CandidatesWithoutStackFixups.push_back(C); + } + + // Is an unused register available? If so, we won't modify the stack, so + // we can outline with the same frame type as those that don't save LR. + else if (findRegisterToSaveLRTo(C)) { + FrameID = MachineOutlinerRegSave; + NumBytesNoStackCalls += Costs->CallRegSave; + C.setCallInfo(MachineOutlinerRegSave, Costs->CallRegSave); + CandidatesWithoutStackFixups.push_back(C); + } + } + + if (!CandidatesWithoutStackFixups.empty()) { + RepeatedSequenceLocs = CandidatesWithoutStackFixups; + } else + return outliner::OutlinedFunction(); + } return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID); @@ -5764,8 +5868,43 @@ return outliner::InstrType::Illegal; // Does this use the stack? - if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) + if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) { + // True if there is no chance that any outlined candidate from this range + // could require stack fixups. That is, both + // * LR is available in the range (No save/restore around call) + // * The range doesn't include calls (No save/restore in outlined frame) + // are true. + // FIXME: This is very restrictive; the flags check the whole block, + // not just the bit we will try to outline. + bool MightNeedStackFixUp = + (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | + MachineOutlinerMBBFlags::HasCalls)); + + // If this instruction is in a range where it *never* needs to be fixed + // up, then we can *always* outline it. This is true even if it's not + // possible to fix that instruction up. + // + // Why? Consider two equivalent instructions I1, I2 where both I1 and I2 + // use SP. Suppose that I1 sits within a range that definitely doesn't + // need stack fixups, while I2 sits in a range that does. + // + // First, I1 can be outlined as long as we *never* fix up the stack in + // any sequence containing it. I1 is already a safe instruction in the + // original program, so as long as we don't modify it we're good to go. + // So this leaves us with showing that outlining I2 won't break our + // program. + // + // Suppose I1 and I2 belong to equivalent candidate sequences. When we + // look at I2, we need to see if it can be fixed up. Suppose I2, (and + // thus I1) cannot be fixed up. Then I2 will be assigned an unique + // integer label; thus, I2 cannot belong to any candidate sequence (a + // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up + // as well, so we're good. Thus, I1 is always safe to outline. + if (!MightNeedStackFixUp) + return outliner::InstrType::Legal; + return outliner::InstrType::Illegal; + } return outliner::InstrType::Legal; } @@ -5797,6 +5936,16 @@ .add(Call->getOperand(0)); Call->eraseFromParent(); } + + // If this is a tail call outlined function, then there's already a return. + if (OF.FrameConstructionID == MachineOutlinerTailCall || + OF.FrameConstructionID == MachineOutlinerThunk) + return; + + // It's not a tail call, so we have to insert the return ourselves. Get the + // correct opcode from current feature set. + BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode())) + .add(predOps(ARMCC::AL)); } MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( @@ -5828,7 +5977,20 @@ CallMIB.add(predOps(ARMCC::AL)); CallMIB.addGlobalAddress(M.getNamedValue(MF.getName())); - // Insert the call. + // Can we save to a register? + if (C.CallConstructionID == MachineOutlinerRegSave) { + MBB.addLiveIn(ARM::LR); + unsigned Reg = findRegisterToSaveLRTo(C); + assert(Reg != 0 && "No callee-saved register available?"); + + // Save and restore LR from that register. + copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true); + CallPt = MBB.insert(It, CallMIB); + copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true); + It--; + return CallPt; + } + It = MBB.insert(It, CallMIB); return It; } diff --git a/llvm/test/CodeGen/ARM/machine-outliner-nosave-and-regs.mir b/llvm/test/CodeGen/ARM/machine-outliner-nosave-and-regs.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-nosave-and-regs.mir @@ -0,0 +1,116 @@ +# RUN: llc -mtriple=arm-- -run-pass=machine-outliner -verify-machineinstrs \ +# RUN: %s -o - | FileCheck %s +# RUN: llc -mtriple=thumbv7-- -run-pass=machine-outliner -verify-machineinstrs \ +# RUN: %s -o - | FileCheck %s + +--- | + define void @outline_no_save_1() #0 { ret void } + define void @outline_no_save_2() #0 { ret void } + define void @outline_save_reg() #0 { ret void } + define void @outline_cpsr_ok() #0 { ret void } + define void @dont_outline_cpsr() #0 { ret void } + define void @dont_outline_r12() #0 { ret void } + + attributes #0 = { minsize optsize } +... +--- + +name: outline_no_save_1 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r1 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVr $r1, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: outline_no_save_2 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r3 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVr $r3, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: outline_save_reg +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $lr + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVr $lr, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: outline_cpsr_ok +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r4 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + CMPri $r4, 42, 14, $noreg, implicit-def $cpsr + Bcc %bb.1, 13, $cpsr + $r2 = MOVr $r4, 14, $noreg, $noreg + bb.1: + BX_RET 14, $noreg +... +--- + +name: dont_outline_cpsr +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $r4 + CMPri $r4, 42, 14, $noreg, implicit-def $cpsr + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + Bcc %bb.1, 13, $cpsr + $r2 = MOVr $r4, 14, $noreg, $noreg + bb.1: + BX_RET 14, $noreg +... +--- + +name: dont_outline_r12 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $r12 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVr $r12, 14, $noreg, $noreg + BX_RET 14, $noreg