Index: include/llvm/CodeGen/MachineOutliner.h =================================================================== --- include/llvm/CodeGen/MachineOutliner.h +++ include/llvm/CodeGen/MachineOutliner.h @@ -19,6 +19,7 @@ #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/LivePhysRegs.h" namespace llvm { namespace outliner { @@ -74,6 +75,13 @@ /// cost model information. LiveRegUnits LRU; + /// Contains the accumulated register liveness information for the + /// instructions in this \p Candidate. + /// + /// This is optionally used by the target to determine which registers have + /// been used across the sequence. + LiveRegUnits UsedInSequence; + /// Return the number of instructions in this Candidate. unsigned getLength() const { return Len; } @@ -132,11 +140,16 @@ "Candidate's Machine Function must track liveness"); LRU.init(TRI); LRU.addLiveOuts(*MBB); - // Compute liveness from the end of the block up to the beginning of the // outlining candidate. std::for_each(MBB->rbegin(), (MachineBasicBlock::reverse_iterator)front(), [this](MachineInstr &MI) { LRU.stepBackward(MI); }); + + // Walk over the sequence itself and figure out which registers were used + // in the sequence. + UsedInSequence.init(TRI); + std::for_each(front(), std::next(back()), + [this](MachineInstr &MI) { UsedInSequence.accumulate(MI); }); } }; Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -271,6 +271,14 @@ ArrayRef Cond) const; bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg, const MachineRegisterInfo *MRI) const; + + /// Returns an unused callee-saved register which can be used for constructing + /// an outlined call if one exists. Returns 0 otherwise. The unused register + /// must + /// - Have been saved in the prospective caller's frame + /// - Not be used within the candidate sequence + /// - Not live out of the candidate sequence + unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; }; /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4851,75 +4851,92 @@ return makeArrayRef(TargetFlags); } - /// Constants defining how certain sequences should be outlined. - /// This encompasses how an outlined function should be called, and what kind of - /// frame should be emitted for that outlined function. - /// - /// \p MachineOutlinerDefault implies that the function should be called with - /// a save and restore of LR to the stack. - /// - /// That is, - /// - /// I1 Save LR OUTLINED_FUNCTION: - /// I2 --> BL OUTLINED_FUNCTION I1 - /// I3 Restore LR I2 - /// I3 - /// RET - /// - /// * Call construction overhead: 3 (save + BL + restore) - /// * Frame construction overhead: 1 (ret) - /// * Requires stack fixups? Yes - /// - /// \p MachineOutlinerTailCall implies that the function is being created from - /// a sequence of instructions ending in a return. - /// - /// That is, - /// - /// I1 OUTLINED_FUNCTION: - /// I2 --> B OUTLINED_FUNCTION I1 - /// RET I2 - /// RET - /// - /// * Call construction overhead: 1 (B) - /// * Frame construction overhead: 0 (Return included in sequence) - /// * Requires stack fixups? No - /// - /// \p MachineOutlinerNoLRSave implies that the function should be called using - /// a BL instruction, but doesn't require LR to be saved and restored. This - /// happens when LR is known to be dead. - /// - /// That is, - /// - /// I1 OUTLINED_FUNCTION: - /// I2 --> BL OUTLINED_FUNCTION I1 - /// I3 I2 - /// I3 - /// RET - /// - /// * Call construction overhead: 1 (BL) - /// * Frame construction overhead: 1 (RET) - /// * Requires stack fixups? No - /// - /// \p MachineOutlinerThunk implies that the function is being created from - /// a sequence of instructions ending in a call. The outlined function is - /// called with a BL instruction, and the outlined function tail-calls the - /// original call destination. - /// - /// That is, - /// - /// I1 OUTLINED_FUNCTION: - /// I2 --> BL OUTLINED_FUNCTION I1 - /// BL f I2 - /// B f - /// * Call construction overhead: 1 (BL) - /// * Frame construction overhead: 0 - /// * Requires stack fixups? No - /// +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerDefault implies that the function should be called with +/// a save and restore of LR to the stack. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// RET +/// +/// * Call construction overhead: 3 (save + BL + restore) +/// * Frame construction overhead: 1 (ret) +/// * Requires stack fixups? Yes +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// RET I2 +/// RET +/// +/// * Call construction overhead: 1 (B) +/// * Frame construction overhead: 0 (Return included in sequence) +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerNoLRSave implies that the function should be called using +/// a BL instruction, but doesn't require LR to be saved and restored. This +/// happens when LR is known to be dead. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 I2 +/// I3 +/// RET +/// +/// * Call construction overhead: 1 (BL) +/// * Frame construction overhead: 1 (RET) +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerThunk implies that the function is being created from +/// a sequence of instructions ending in a call. The outlined function is +/// called with a BL instruction, and the outlined function tail-calls the +/// original call destination. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// BL f I2 +/// B f +/// * Call construction overhead: 1 (BL) +/// * Frame construction overhead: 0 +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerRegSave implies that the function should be called with a +/// save and restore of LR to an available callee-saved register. This allows +/// us to avoid stack fixups. Note that this outlining variant is compatible +/// with the NoLRSave case. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// RET +/// +/// * Call construction overhead: 3 (save + BL + restore) +/// * Frame construction overhead: 1 (ret) +/// * Requires stack fixups? No enum MachineOutlinerClass { MachineOutlinerDefault, /// Emit a save, restore, call, and return. MachineOutlinerTailCall, /// Only emit a branch. MachineOutlinerNoLRSave, /// Emit a call and return. MachineOutlinerThunk, /// Emit a call and tail-call. + MachineOutlinerRegSave /// Same as default, but save to a register. }; enum MachineOutlinerMBBFlags { @@ -4927,6 +4944,34 @@ HasCalls = 0x4 }; +unsigned +AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { + MachineFunction *MF = C.getMF(); + + // We only want callee-saved registers that were saved in the Candidate's + // frame. + const std::vector &CSInfo = + MF->getFrameInfo().getCalleeSavedInfo(); + const AArch64RegisterInfo *ARI = static_cast( + MF->getSubtarget().getRegisterInfo()); + + // Check if there's a 64-bit callee-saved register which was saved in the + // prospective caller's frame, which is not live across the sequence and is + // not used within the sequence. + for (auto &CS : CSInfo) { + unsigned Reg = CS.getReg(); + if (!ARI->isReservedReg(*MF, Reg) && + Reg != AArch64::LR && // LR is not reserved. + AArch64::GPR64RegClass.contains(Reg) && + C.UsedInSequence.available(Reg) && + C.LRU.available(Reg)) + return Reg; + } + + // No suitable register. Return 0. + return 0u; +} + outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { @@ -5011,11 +5056,27 @@ SetCandidateCallInfo(MachineOutlinerNoLRSave, 4); } - // LR is live, so we need to save it to the stack. + // LR is live, so we need to save it. Decide whether it should be saved to + // the stack, or if it can be saved to a register. else { - FrameID = MachineOutlinerDefault; - NumBytesToCreateFrame = 4; - SetCandidateCallInfo(MachineOutlinerDefault, 12); + if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [this](outliner::Candidate &C) { + return findRegisterToSaveLRTo(C); + })) { + // Every candidate has an available callee-saved register for the save. + // We can save LR to a register. + FrameID = MachineOutlinerRegSave; + NumBytesToCreateFrame = 4; + SetCandidateCallInfo(MachineOutlinerRegSave, 12); + } + + else { + // At least one candidate does not have an available callee-saved + // register. We must save LR to the stack. + FrameID = MachineOutlinerDefault; + NumBytesToCreateFrame = 4; + SetCandidateCallInfo(MachineOutlinerDefault, 12); + } } // Check if the range contains a call. These require a save + restore of the @@ -5420,7 +5481,7 @@ MBB.insert(MBB.end(), ret); // Did we have to modify the stack by saving the link register? - if (OF.FrameConstructionID == MachineOutlinerNoLRSave) + if (OF.FrameConstructionID != MachineOutlinerDefault) return; // We modified the stack. @@ -5453,13 +5514,41 @@ // We want to return the spot where we inserted the call. MachineBasicBlock::iterator CallPt; - // We have a default call. Save the link register. - MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) - .addReg(AArch64::SP, RegState::Define) - .addReg(AArch64::LR) - .addReg(AArch64::SP) - .addImm(-16); - It = MBB.insert(It, STRXpre); + // Instructions for saving and restoring LR around the call instruction we're + // going to insert. + MachineInstr *Save; + MachineInstr *Restore; + // Can we save to a register? + if (C.CallConstructionID == MachineOutlinerRegSave) { + // FIXME: This logic should be sunk into a target-specific interface so that + // we don't have to recompute the register. + unsigned Reg = findRegisterToSaveLRTo(C); + assert(Reg != 0 && "No callee-saved register available?"); + + // Save and restore LR from that register. + Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) + .addReg(AArch64::XZR) + .addReg(AArch64::LR) + .addImm(0); + Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR) + .addReg(AArch64::XZR) + .addReg(Reg) + .addImm(0); + } else { + // We have the default case. Save and restore from SP. + Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::SP) + .addImm(-16); + Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR, RegState::Define) + .addReg(AArch64::SP) + .addImm(16); + } + + It = MBB.insert(It, Save); It++; // Insert the call. @@ -5468,13 +5557,6 @@ CallPt = It; It++; - // Restore the link register. - MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) - .addReg(AArch64::SP, RegState::Define) - .addReg(AArch64::LR, RegState::Define) - .addReg(AArch64::SP) - .addImm(16); - It = MBB.insert(It, LDRXpost); - + It = MBB.insert(It, Restore); return CallPt; } Index: test/CodeGen/AArch64/machine-outliner-regsave.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/machine-outliner-regsave.mir @@ -0,0 +1,138 @@ +# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog \ +# RUN: -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s +# Check that we save LR to a callee-saved register when possible. +# foo() should use a callee-saved register. However, bar1-3() should not. +--- | + + define void @foo() #0 { + ret void + } + + define void @bar1() #0 { + ret void + } + + define void @bar2() #0 { + ret void + } + + define void @bar3() #0 { + ret void + } + + attributes #0 = { noinline noredzone "no-frame-pointer-elim"="true" } +... +--- +# Make sure that when we outline and a callee-saved register is available, we +# use it to save + restore LR instead of SP. +# CHECK: name: foo +# CHECK-DAG: bb.0 +# CHECK-DAG: $x25 = ORRXrs $xzr, $lr, 0 +# CHECK-NEXT: BL +# CHECK-NEXT: $lr = ORRXrs $xzr, $x25, 0 +# CHECK-DAG: bb.1 +# CHECK-DAG: $x25 = ORRXrs $xzr, $lr, 0 +# CHECK-NEXT: BL +# CHECK-NEXT: $lr = ORRXrs $xzr, $x25, 0 +# CHECK-DAG: bb.2 +# CHECK-DAG: $x25 = ORRXrs $xzr, $lr, 0 +# CHECK-NEXT: BL +# CHECK-NEXT: $lr = ORRXrs $xzr, $x25, 0 +name: foo +tracksRegLiveness: true +fixedStack: +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: 0, callee-saved-register: '$x25', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +body: | + bb.0: + liveins: $lr, $w9, $x25 + $x25 = ORRXri $xzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 2 + bb.1: + liveins: $lr, $w9 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 2 + bb.2: + liveins: $lr, $w9 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 2 + RET undef $lr + +... +--- +# CHECK-LABEL: name: bar1 +# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16 +# CHECK-NEXT: BL +# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16 +name: bar1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr, $w10 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 2 + bb.1: + RET undef $lr + +... +--- +# CHECK-LABEL: name: bar2 +# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16 +# CHECK-NEXT: BL +# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16 +name: bar2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr, $w10, $x25 + $x25 = ORRXri $xzr, 1 ; x25 is available here, but we shouldn't use it. + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 2 + bb.1: + RET undef $lr + +... +--- +# CHECK-LABEL: name: bar3 +# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16 +# CHECK-NEXT: BL +# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16 +name: bar3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr, $w10, $x25 + $x25 = ORRXri $xzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 2 + bb.1: + RET undef $lr + Index: test/CodeGen/AArch64/machine-outliner.mir =================================================================== --- test/CodeGen/AArch64/machine-outliner.mir +++ test/CodeGen/AArch64/machine-outliner.mir @@ -48,6 +48,7 @@ tracksRegLiveness: true body: | bb.0: + liveins: $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr $sp = frame-setup SUBXri $sp, 16, 0 renamable $x9 = ADRP target-flags(aarch64-page) @bar $x9 = ORRXri $xzr, 1