Index: include/llvm/CodeGen/MachineOutliner.h =================================================================== --- include/llvm/CodeGen/MachineOutliner.h +++ include/llvm/CodeGen/MachineOutliner.h @@ -16,7 +16,9 @@ #ifndef LLVM_MACHINEOUTLINER_H #define LLVM_MACHINEOUTLINER_H +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" namespace outliner { @@ -92,6 +94,20 @@ /// Contains all target-specific information for this \p Candidate. TargetCostInfo TCI; + /// Contains physical register liveness information entering this + /// \p Candidate. + /// + /// This is optionally used by the target to calculate more fine-grained + /// cost model information. + LiveRegUnits LRUIn; + + /// Contains physical register liveness information at the exit to this + /// \p Candidate. + /// + /// This is optionally used by the target to calculate more fine-grained + /// cost model information. + LiveRegUnits LRUOut; + /// Return the number of instructions in this Candidate. unsigned getLength() const { return Len; } @@ -121,12 +137,38 @@ unsigned FunctionIdx) : StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst), MBB(MBB), FunctionIdx(FunctionIdx) {} + Candidate() {} /// Used to ensure that \p Candidates are outlined in an order that /// preserves the start and end indices of other \p Candidates. bool operator<(const Candidate &RHS) const { return getStartIdx() > RHS.getStartIdx(); } + + /// Compute register liveness up to the end of the repeated sequence. + /// Used by targets that need this information for cost model calculation. + /// If a target does not need this information, then this should not be + /// called. + void initLRU(const TargetRegisterInfo &TRI) { + + // First, calculate register liveness on exit from this candidate. Walk from + // the end of MBB to the beginning of the candidate. Whichever registers are + // live in LRUOut are the candidate's live outs. + LRUOut.init(TRI); + LRUOut.addLiveOuts(*MBB); + std::for_each(MBB->rbegin(), + (MachineBasicBlock::reverse_iterator)front(), + [this](MachineInstr &MI) { LRUOut.stepBackward(MI);}); + + + // Next, calculate register liveness on entry to this candidate. Walk from + // the beginning of the candidate back to the beginning of MBB. Whichever + // registers are live in LRUIn are the candidate's live ins. + LRUIn.init(TRI); + LRUIn.addLiveOuts(*MBB); + std::for_each((MachineBasicBlock::reverse_iterator)front(), MBB->rend(), + [this](MachineInstr &MI) { LRUIn.stepBackward(MI); }); + } }; /// The information necessary to create an outlined function for some Index: lib/CodeGen/MachineOutliner.cpp =================================================================== --- lib/CodeGen/MachineOutliner.cpp +++ lib/CodeGen/MachineOutliner.cpp @@ -66,7 +66,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/IRBuilder.h" @@ -901,7 +900,7 @@ // Create an OutlinedFunction to store it and check if it'd be beneficial // to outline. TargetCostInfo TCI = - TII.getOutlininingCandidateInfo(CandidatesForRepeatedSeq); + TII.getOutlininingCandidateInfo(CandidatesForRepeatedSeq); std::vector Seq; for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++) Seq.push_back(ST.Str[i]); Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -238,9 +238,6 @@ /// AArch64 supports the MachineOutliner. bool useMachineOutliner() const override { return true; } - - bool - canOutlineWithoutLRSave(MachineBasicBlock::iterator &CallInsertionPt) const; bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override; outliner::TargetCostInfo getOutlininingCandidateInfo( Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -4928,42 +4927,76 @@ HasCalls = 0x4 }; -bool AArch64InstrInfo::canOutlineWithoutLRSave( - MachineBasicBlock::iterator &CallInsertionPt) const { - // Was LR saved in the function containing this basic block? - MachineBasicBlock &MBB = *(CallInsertionPt->getParent()); - LiveRegUnits LRU(getRegisterInfo()); - LRU.addLiveOuts(MBB); - - // Get liveness information from the end of the block to the end of the - // prospective outlined region. - std::for_each(MBB.rbegin(), - (MachineBasicBlock::reverse_iterator)CallInsertionPt, - [&LRU](MachineInstr &MI) { LRU.stepBackward(MI); }); - - // If the link register is available at this point, then we can safely outline - // the region without saving/restoring LR. Otherwise, we must emit a save and - // restore. - return LRU.available(AArch64::LR); -} outliner::TargetCostInfo AArch64InstrInfo::getOutlininingCandidateInfo( std::vector &RepeatedSequenceLocs) const { unsigned SequenceSize = - std::accumulate(RepeatedSequenceLocs[0].front(), - std::next(RepeatedSequenceLocs[0].back()), 0, - [this](unsigned Sum, const MachineInstr &MI) { - return Sum + getInstSizeInBytes(MI); - }); + std::accumulate(RepeatedSequenceLocs[0].front(), + std::next(RepeatedSequenceLocs[0].back()), 0, + [this](unsigned Sum, const MachineInstr &MI) { + return Sum + getInstSizeInBytes(MI); + }); + + // Compute liveness information for each candidate. + const TargetRegisterInfo &TRI = getRegisterInfo(); + std::for_each(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + [&TRI](outliner::Candidate &C){ + C.initLRU(TRI); + }); + + // According to the AArch64 Procedure Call Standard, the following are + // undefined on entry/exit from a function call: + // + // * Registers x16, x17, (and thus w16, w17) + // * Condition codes (and thus the NZCV register) + // + // Therefore, if we have a situation like this: + // + // I1 (defines, say, x16) + // I2 + // ---- Outlining candidate -------- + // I3 + // ... + // ---- End outlining candidate ---- + // Ik (uses x16) + // + // We cannot outline that candidate. The value of x16 at Ik is not guaranteed + // to be the same as in I1 once we insert a call instruction. Similarly, if + // x16 was defined inside the outlining candidate region, we cannot say that + // the value of x16 at Ik will be the same there. + // + // Thus, we must erase every candidate that violates any of these conditions. + auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) { + LiveRegUnits LRUIn = C.LRUIn; + LiveRegUnits LRUOut = C.LRUOut; + + if (!LRUIn.available(AArch64::W16) || !LRUOut.available(AArch64::W16)) + return true; + + if (!LRUIn.available(AArch64::W17) || !LRUOut.available(AArch64::W17)) + return true; + + if (!LRUIn.available(AArch64::NZCV) || !LRUOut.available(AArch64::NZCV)) + return true; + + return false; + }; + + // Erase every candidate that violates the restrictions above. (It could be + // true that we have viable candidates, so it's not worth bailing out in the + // case that, say, 1 out of 20 candidates violate the restructions.) + RepeatedSequenceLocs.erase( + std::remove_if(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + CantGuaranteeValueAcrossCall), + RepeatedSequenceLocs.end()); + + // At this point, we have only "safe" candidates to outline. Figure out frame + // + call instruction information. unsigned CallID = MachineOutlinerDefault; unsigned FrameID = MachineOutlinerDefault; unsigned NumBytesForCall = 12; unsigned NumBytesToCreateFrame = 4; - - auto DoesntNeedLRSave = [this](outliner::Candidate &I) { - return canOutlineWithoutLRSave(I.back()); - }; - unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); // If the last instruction in any candidate is a terminator, then we should @@ -4983,8 +5016,11 @@ NumBytesToCreateFrame = 0; } - else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), - DoesntNeedLRSave)) { + else if (std::all_of(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + [](outliner::Candidate &C) { + return C.LRUOut.available(AArch64::LR); + })) { CallID = MachineOutlinerNoLRSave; FrameID = MachineOutlinerNoLRSave; NumBytesForCall = 4; @@ -5007,8 +5043,11 @@ RepeatedSequenceLocs[0].back()->isCall()) NumBytesToCreateFrame += 8; - return outliner::TargetCostInfo(SequenceSize, NumBytesForCall, - NumBytesToCreateFrame, CallID, FrameID); + return outliner::TargetCostInfo(SequenceSize, + NumBytesForCall, + NumBytesToCreateFrame, + CallID, + FrameID); } bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( Index: test/CodeGen/AArch64/machine-outliner-bad-register.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/machine-outliner-bad-register.mir @@ -0,0 +1,104 @@ +# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s + +# Ensure that we don't outline from regions where x16, x17, or nzcv are live +# across the outlining candidate. These values are allowed to be clobbered by, +# say, the linker, in the presence of function calls. Thus, we can't outline +# these, since the insertion of the outlined call could change the values of +# these registers. +--- | + ; This function should be the only one with outlining candidates. + define void @should_outline(i32 %a) #0 { + ret void + } + + define void @x16_live() #0 { + ret void + } + + define void @x17_live() #0 { + ret void + } + + define void @nzcv_live() #0 { + ret void + } + + attributes #0 = { noredzone } +... +--- + +name: should_outline +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: BL + liveins: $w8, $wzr + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.1: + ; CHECK-LABEL: bb.1: + ; CHECK: BL + liveins: $w8, $wzr + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.2: + RET undef $lr + +... +--- + +name: x16_live +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $w8, $wzr, $x16 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + INLINEASM &nop, 0, 10, implicit-def $x16, 2147483657, $x16(tied-def 3) + RET undef $lr + +... +--- + +name: x17_live +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $w8, $wzr + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.1: + liveins: $x17 + RET undef $lr + +... +--- + +name: nzcv_live +tracksRegLiveness: true +body: | + bb.0: + liveins: $w8, $wzr + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.1: + liveins: $nzcv + RET undef $lr