diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -373,6 +373,10 @@ const outliner::Candidate &C) const override; private: + /// Returns an unused general-purpose register which can be used for + /// constructing an outlined call if one exists. Returns 0 otherwise. + unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; + unsigned getInstBundleLength(const MachineInstr &MI) const; int getVLDMDefCycle(const InstrItineraryData *ItinData, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5580,11 +5580,33 @@ /// | Frame overhead in Bytes | 4 | 4 | /// | Stack fixup required | No | No | /// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerRegSave implies that the function should be called with a +/// save and restore of LR to an available register. This allows us to avoid +/// stack fixups. Note that this outlining variant is compatible with the +/// NoLRSave case. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 8 | 12 | +/// | Frame overhead in Bytes | 2 | 4 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ enum MachineOutlinerClass { MachineOutlinerTailCall, MachineOutlinerThunk, - MachineOutlinerNoLRSave + MachineOutlinerNoLRSave, + MachineOutlinerRegSave }; enum MachineOutlinerMBBFlags { @@ -5600,6 +5622,8 @@ const int FrameThunk; const int CallNoLRSave; const int FrameNoLRSave; + const int CallRegSave; + const int FrameRegSave; OutlinerCosts(const ARMSubtarget &target) : CallTailCall(target.isThumb() ? 4 : 4), @@ -5607,9 +5631,33 @@ CallThunk(target.isThumb() ? 4 : 4), FrameThunk(target.isThumb() ? 0 : 0), CallNoLRSave(target.isThumb() ? 4 : 4), - FrameNoLRSave(target.isThumb() ? 4 : 4) {} + FrameNoLRSave(target.isThumb() ? 4 : 4), + CallRegSave(target.isThumb() ? 8 : 12), + FrameRegSave(target.isThumb() ? 2 : 4) {} }; +unsigned +ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { + assert(C.LRUWasSet && "LRU wasn't set?"); + MachineFunction *MF = C.getMF(); + const ARMBaseRegisterInfo *ARI = static_cast( + MF->getSubtarget().getRegisterInfo()); + + BitVector regsReserved = ARI->getReservedRegs(*MF); + // Check if there is an available register across the sequence that we can + // use. + for (unsigned Reg : ARM::rGPRRegClass) { + if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) && + Reg != ARM::LR && // LR is not reserved, but don't use it. + Reg != ARM::R12 && // R12 is not guaranteed to be preserved. + C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + return Reg; + } + + // No suitable register. Return 0. + return 0u; +} + outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; @@ -5709,6 +5757,15 @@ C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave); CandidatesWithoutStackFixups.push_back(C); } + + // Is an unused register available? If so, we won't modify the stack, so + // we can outline with the same frame type as those that don't save LR. + else if (findRegisterToSaveLRTo(C)) { + FrameID = MachineOutlinerRegSave; + NumBytesNoStackCalls += Costs.CallRegSave; + C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave); + CandidatesWithoutStackFixups.push_back(C); + } } if (!CandidatesWithoutStackFixups.empty()) { @@ -5961,6 +6018,20 @@ CallMIB.add(predOps(ARMCC::AL)); CallMIB.addGlobalAddress(M.getNamedValue(MF.getName())); + // Can we save to a register? + if (C.CallConstructionID == MachineOutlinerRegSave) { + unsigned Reg = findRegisterToSaveLRTo(C); + assert(Reg != 0 && "No callee-saved register available?"); + + // Save and restore LR from that register. + if (!MBB.isLiveIn(ARM::LR)) + MBB.addLiveIn(ARM::LR); + copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true); + CallPt = MBB.insert(It, CallMIB); + copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true); + It--; + return CallPt; + } // Insert the call. It = MBB.insert(It, CallMIB); return It; diff --git a/llvm/test/CodeGen/ARM/machine-outliner-lr-regsave.mir b/llvm/test/CodeGen/ARM/machine-outliner-lr-regsave.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-lr-regsave.mir @@ -0,0 +1,174 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=arm-- -run-pass=machine-outliner -verify-machineinstrs \ +# RUN: %s -o - | FileCheck %s + +--- | + define void @outline_save_reg_arm() #0 { ret void } + define void @outline_save_reg_thumb() #1 { ret void } + declare void @z() + + attributes #0 = { minsize optsize } + attributes #1 = { minsize optsize "target-features"="+armv7-a,+thumb-mode" } +... +--- + +name: outline_save_reg_arm +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: outline_save_reg_arm + ; CHECK: bb.0: + ; CHECK: liveins: $lr + ; CHECK: $r6 = MOVr killed $lr, 14 /* CC::al */, $noreg, $noreg + ; CHECK: BL @OUTLINED_FUNCTION_1 + ; CHECK: $lr = MOVr killed $r6, 14 /* CC::al */, $noreg, $noreg + ; CHECK: bb.1: + ; CHECK: liveins: $lr + ; CHECK: $r6 = MOVr killed $lr, 14 /* CC::al */, $noreg, $noreg + ; CHECK: BL @OUTLINED_FUNCTION_1 + ; CHECK: $lr = MOVr killed $r6, 14 /* CC::al */, $noreg, $noreg + ; CHECK: bb.2: + ; CHECK: liveins: $lr + ; CHECK: $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r1 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r2 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r3 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r4 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r5 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: bb.3: + ; CHECK: liveins: $lr, $r0, $r6, $r7, $r8, $r9, $r10, $r11 + ; CHECK: $r6 = MOVr killed $lr, 14 /* CC::al */, $noreg, $noreg + ; CHECK: BL @OUTLINED_FUNCTION_1 + ; CHECK: $lr = MOVr killed $r6, 14 /* CC::al */, $noreg, $noreg + ; CHECK: bb.4: + ; CHECK: liveins: $lr + ; CHECK: $r2 = MOVr $lr, 14 /* CC::al */, $noreg, $noreg + ; CHECK: BX_RET 14 /* CC::al */, $noreg + bb.0: + liveins: $lr + $r0 = MOVi 1, 14, $noreg, $noreg + $r1 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r3 = MOVi 1, 14, $noreg, $noreg + $r4 = MOVi 1, 14, $noreg, $noreg + $r5 = MOVi 1, 14, $noreg, $noreg + bb.1: + liveins: $lr + $r0 = MOVi 1, 14, $noreg, $noreg + $r1 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r3 = MOVi 1, 14, $noreg, $noreg + $r4 = MOVi 1, 14, $noreg, $noreg + $r5 = MOVi 1, 14, $noreg, $noreg + bb.2: + liveins: $lr + $r0 = MOVi 1, 14, $noreg, $noreg + $r1 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r3 = MOVi 1, 14, $noreg, $noreg + $r4 = MOVi 1, 14, $noreg, $noreg + $r5 = MOVi 1, 14, $noreg, $noreg + bb.3: + liveins: $lr, $r0, $r6, $r7, $r8, $r9, $r10, $r11 + $r0 = MOVi 1, 14, $noreg, $noreg + $r1 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r3 = MOVi 1, 14, $noreg, $noreg + $r4 = MOVi 1, 14, $noreg, $noreg + $r5 = MOVi 1, 14, $noreg, $noreg + bb.4: + liveins: $lr + $r2 = MOVr $lr, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: outline_save_reg_thumb +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: outline_save_reg_thumb + ; CHECK: bb.0: + ; CHECK: liveins: $lr + ; CHECK: $r6 = tMOVr killed $lr, 14 /* CC::al */, $noreg + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0 + ; CHECK: $lr = tMOVr killed $r6, 14 /* CC::al */, $noreg + ; CHECK: bb.1: + ; CHECK: liveins: $lr + ; CHECK: $r6 = tMOVr killed $lr, 14 /* CC::al */, $noreg + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0 + ; CHECK: $lr = tMOVr killed $r6, 14 /* CC::al */, $noreg + ; CHECK: bb.2: + ; CHECK: liveins: $lr + ; CHECK: $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r1 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r2 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r3 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r4 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r5 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: bb.3: + ; CHECK: liveins: $lr, $r0, $r6, $r7 + ; CHECK: $r6 = tMOVr killed $lr, 14 /* CC::al */, $noreg + ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0 + ; CHECK: $lr = tMOVr killed $r6, 14 /* CC::al */, $noreg + ; CHECK: bb.4: + ; CHECK: liveins: $lr + ; CHECK: $r2 = tMOVr $lr, 14 /* CC::al */, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg + bb.0: + liveins: $lr + $r0 = t2MOVi 1, 14, $noreg, $noreg + $r1 = t2MOVi 1, 14, $noreg, $noreg + $r2 = t2MOVi 1, 14, $noreg, $noreg + $r3 = t2MOVi 1, 14, $noreg, $noreg + $r4 = t2MOVi 1, 14, $noreg, $noreg + $r5 = t2MOVi 1, 14, $noreg, $noreg + bb.1: + liveins: $lr + $r0 = t2MOVi 1, 14, $noreg, $noreg + $r1 = t2MOVi 1, 14, $noreg, $noreg + $r2 = t2MOVi 1, 14, $noreg, $noreg + $r3 = t2MOVi 1, 14, $noreg, $noreg + $r4 = t2MOVi 1, 14, $noreg, $noreg + $r5 = t2MOVi 1, 14, $noreg, $noreg + bb.2: + liveins: $lr + $r0 = t2MOVi 1, 14, $noreg, $noreg + $r1 = t2MOVi 1, 14, $noreg, $noreg + $r2 = t2MOVi 1, 14, $noreg, $noreg + $r3 = t2MOVi 1, 14, $noreg, $noreg + $r4 = t2MOVi 1, 14, $noreg, $noreg + $r5 = t2MOVi 1, 14, $noreg, $noreg + bb.3: + liveins: $lr, $r0, $r6, $r7, $r8, $r9, $r10, $r11 + $r0 = t2MOVi 1, 14, $noreg, $noreg + $r1 = t2MOVi 1, 14, $noreg, $noreg + $r2 = t2MOVi 1, 14, $noreg, $noreg + $r3 = t2MOVi 1, 14, $noreg, $noreg + $r4 = t2MOVi 1, 14, $noreg, $noreg + $r5 = t2MOVi 1, 14, $noreg, $noreg + bb.4: + liveins: $lr + $r2 = tMOVr $lr, 14, $noreg + tBX_RET 14, $noreg + + ; CHECK-LABEL: name: OUTLINED_FUNCTION_0 + ; CHECK: bb.0: + ; CHECK: liveins: $lr + ; CHECK: $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r1 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r2 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r3 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r4 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r5 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg + + ; CHECK-LABEL: name: OUTLINED_FUNCTION_1 + ; CHECK: bb.0: + ; CHECK: liveins: $lr + ; CHECK: $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r1 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r2 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r3 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r4 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r5 = MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: MOVPCLR 14 /* CC::al */, $noreg +