Index: include/llvm/CodeGen/MachineOutliner.h =================================================================== --- include/llvm/CodeGen/MachineOutliner.h +++ include/llvm/CodeGen/MachineOutliner.h @@ -16,7 +16,9 @@ #ifndef LLVM_MACHINEOUTLINER_H #define LLVM_MACHINEOUTLINER_H +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" namespace llvm { namespace outliner { @@ -91,6 +93,13 @@ /// Contains all target-specific information for this \p Candidate. TargetCostInfo TCI; + /// Contains physical register liveness information for the MBB containing + /// this \p Candidate. + /// + /// This is optionally used by the target to calculate more fine-grained + /// cost model information. + LiveRegUnits LRU; + /// Return the number of instructions in this Candidate. unsigned getLength() const { return Len; } @@ -120,12 +129,31 @@ unsigned FunctionIdx) : StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst), MBB(MBB), FunctionIdx(FunctionIdx) {} + Candidate() {} /// Used to ensure that \p Candidates are outlined in an order that /// preserves the start and end indices of other \p Candidates. bool operator<(const Candidate &RHS) const { return getStartIdx() > RHS.getStartIdx(); } + + /// Compute the registers that are live across this Candidate. + /// Used by targets that need this information for cost model calculation. + /// If a target does not need this information, then this should not be + /// called. + void initLRU(const TargetRegisterInfo &TRI) { + LRU.init(TRI); + LRU.addLiveOuts(*MBB); + + // We want to know liveness across the candidate, so we need to also look + // at the liveins to the block. + LRU.addLiveIns(*MBB); + + // Compute liveness from the end of the block up to the beginning of the + // outlining candidate. + std::for_each(MBB->rbegin(), ++(MachineBasicBlock::reverse_iterator)front(), + [this](MachineInstr &MI) { LRU.stepBackward(MI); }); + } }; /// The information necessary to create an outlined function for some Index: lib/CodeGen/MachineOutliner.cpp =================================================================== --- lib/CodeGen/MachineOutliner.cpp +++ lib/CodeGen/MachineOutliner.cpp @@ -65,7 +65,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/IRBuilder.h" Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -238,9 +238,6 @@ /// AArch64 supports the MachineOutliner. bool useMachineOutliner() const override { return true; } - - bool - canOutlineWithoutLRSave(MachineBasicBlock::iterator &CallInsertionPt) const; bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override; outliner::TargetCostInfo getOutlininingCandidateInfo( Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -4928,25 +4927,6 @@ HasCalls = 0x4 }; -bool AArch64InstrInfo::canOutlineWithoutLRSave( - MachineBasicBlock::iterator &CallInsertionPt) const { - // Was LR saved in the function containing this basic block? - MachineBasicBlock &MBB = *(CallInsertionPt->getParent()); - LiveRegUnits LRU(getRegisterInfo()); - LRU.addLiveOuts(MBB); - - // Get liveness information from the end of the block to the end of the - // prospective outlined region. - std::for_each(MBB.rbegin(), - (MachineBasicBlock::reverse_iterator)CallInsertionPt, - [&LRU](MachineInstr &MI) { LRU.stepBackward(MI); }); - - // If the link register is available at this point, then we can safely outline - // the region without saving/restoring LR. Otherwise, we must emit a save and - // restore. - return LRU.available(AArch64::LR); -} - outliner::TargetCostInfo AArch64InstrInfo::getOutlininingCandidateInfo( std::vector &RepeatedSequenceLocs) const { @@ -4961,8 +4941,38 @@ unsigned NumBytesForCall = 12; unsigned NumBytesToCreateFrame = 4; - auto DoesntNeedLRSave = - [this](outliner::Candidate &I) {return canOutlineWithoutLRSave(I.back());}; + // Compute liveness information for each candidate. + const TargetRegisterInfo &TRI = getRegisterInfo(); + std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [&TRI](outliner::Candidate &C) { C.initLRU(TRI); }); + + // According to the AArch64 Procedure Call Standard, the following are + // undefined on entry/exit from a function call: + // + // * Registers x16, x17, (and thus w16, w17) + // * Condition codes (and thus the NZCV register) + // + // Because if this, we can't outline any sequence of instructions where + // one + // of these registers is live into/across it. Thus, we need to delete + // those + // candidates. + auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) { + LiveRegUnits LRU = C.LRU; + return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) || + !LRU.available(AArch64::NZCV)); + }; + + // Erase every candidate that violates the restrictions above. (It could be + // true that we have viable candidates, so it's not worth bailing out in + // the case that, say, 1 out of 20 candidates violate the restructions.) + RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + CantGuaranteeValueAcrossCall), + RepeatedSequenceLocs.end()); + + // At this point, we have only "safe" candidates to outline. Figure out + // frame + call instruction information. unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); @@ -4983,8 +4993,15 @@ NumBytesToCreateFrame = 0; } - else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), - DoesntNeedLRSave)) { + // Make sure that LR isn't live on entry to this candidate. The only + // instructions that use LR that could possibly appear in a repeated sequence + // are calls. Therefore, we only have to check and see if LR is dead on entry + // to (or exit from) some candidate. + else if (std::all_of(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + [](outliner::Candidate &C) { + return C.LRU.available(AArch64::LR); + })) { CallID = MachineOutlinerNoLRSave; FrameID = MachineOutlinerNoLRSave; NumBytesForCall = 4; Index: test/CodeGen/AArch64/machine-outliner-bad-register.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/machine-outliner-bad-register.mir @@ -0,0 +1,190 @@ +# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s + +# Ensure that we don't outline from regions where x16, x17, or nzcv are live +# across the outlining candidate. These values are allowed to be clobbered by, +# say, the linker, in the presence of function calls. Thus, we can't outline +# these, since the insertion of the outlined call could change the values of +# these registers. +--- | + ; No problematic register appears at all. Safe for outlining. + define void @reg_never_defined() #0 { ret void } + + ; A problematic register is live, but after the candidate. Safe for outlining. + define void @reg_defined_after_candidate() #0 { ret void } + + ; A problematic register is live before the candidate, but killed before + ; entry to the candidate. Safe for outlining. + define void @reg_killed_before_candidate() #0 { ret void } + + ; Ensure that we never outline when any of the problematic registers we care + ; about are defined across the outlining candidate. + define void @x16_live() #0 { ret void } + define void @x17_live() #0 { ret void } + define void @nzcv_live() #0 { ret void } + + ; Test a combination of the above behaviours. + ; [candidate] (1) + ; - define a bad register - + ; [candidate] (2) + ; - kill the bad register - + ; [candidate] (3) + ; + ; (1) and (3) should be outlined, while (2) should not be outlined. + define void @multiple_ranges() #0 { ret void } + + attributes #0 = { noredzone } +... +--- + +# There should be two calls to outlined functions here, since we haven't tripped +# any of the cases above. +name: reg_never_defined +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: BL + liveins: $w8, $wzr + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.1: + ; CHECK-LABEL: bb.1: + ; CHECK: BL + liveins: $w8, $wzr + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.2: + RET undef $lr +... +--- + +name: reg_defined_after_candidate +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: BL + ; CHECK-NEXT: $x16 = ORRXri $x8, 5, implicit-def $x16, implicit-def $w16 + liveins: $w8, $wzr + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + $x16 = ORRXri $x8, 5, implicit-def $x16, implicit-def $w16 + $w8 = ORRWri $w16, 5 + RET undef $lr +... +--- + +name: reg_killed_before_candidate +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: BL + liveins: $w8, $wzr, $x16 + dead $x16 = ORRXri $x8, 6 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + RET undef $lr +... +--- + +name: x16_live +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $w8, $wzr, $x16 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.1: + liveins: $x16 + RET undef $lr +... +--- + +name: x17_live +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $w8, $wzr, $x17 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + $w8 = ORRWri $w17, 5 + RET undef $lr +... +--- + +name: nzcv_live +tracksRegLiveness: true +body: | + bb.0: + liveins: $w8, $wzr, $nzcv + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.1: + liveins: $nzcv + RET undef $lr +... +--- + +name: multiple_ranges +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $w8, $wzr + ; FIXME: It should be possible to outline this if we keep track of liveness + ; specifically within the candidate. Right now, the outliner tracks liveness + ; from the end of the block up to the beginning of the candidate. If we + ; extend that such that we also consider liveness from the end of the + ; candidate to the beginning of the candidate, we can outline the sequence + ; here safely. For now, the ORRXri after the candidate blocks this + ; behaviour. + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + $x16 = ORRXri $x8, 5, implicit-def $x16 + bb.1: + ; CHECK-LABEL: bb.1: + ; CHECK-NOT: BL + liveins: $w8, $x16 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + $x8 = ORRXri $x16, 5 + bb.2: + ; CHECK-LABEL: bb.2: + ; CHECK: BL + liveins: $w8 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.3: + liveins: $w8 + RET undef $lr +... +---