Index: include/llvm/CodeGen/MachineOutliner.h =================================================================== --- include/llvm/CodeGen/MachineOutliner.h +++ include/llvm/CodeGen/MachineOutliner.h @@ -16,7 +16,9 @@ #ifndef LLVM_MACHINEOUTLINER_H #define LLVM_MACHINEOUTLINER_H +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" namespace outliner { @@ -78,7 +80,7 @@ // The last instruction in this \p Candidate. MachineBasicBlock::iterator LastInst; - // The basic block that contains this Candidate. + // The basic block that contains this \p Candidate. MachineBasicBlock *MBB; public: @@ -92,6 +94,10 @@ /// Contains all target-specific information for this \p Candidate. TargetCostInfo TCI; + /// Contains any liveness information necessary for evaluating the cost of + /// this Candidate. + LiveRegUnits LRU; + /// Return the number of instructions in this Candidate. unsigned getLength() const { return Len; } @@ -121,12 +127,22 @@ unsigned FunctionIdx) : StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst), MBB(MBB), FunctionIdx(FunctionIdx) {} + Candidate() {} /// Used to ensure that \p Candidates are outlined in an order that /// preserves the start and end indices of other \p Candidates. bool operator<(const Candidate &RHS) const { return getStartIdx() > RHS.getStartIdx(); } + + /// Compute register liveness up to the end of the repeated sequence. + void initLRU(const TargetRegisterInfo &TRI) { + LRU.init(TRI); + LRU.addLiveOuts(*MBB); + std::for_each(MBB->rbegin(), + (MachineBasicBlock::reverse_iterator)front(), + [this](MachineInstr &MI) { LRU.stepBackward(MI);}); + } }; /// The information necessary to create an outlined function for some Index: lib/CodeGen/MachineOutliner.cpp =================================================================== --- lib/CodeGen/MachineOutliner.cpp +++ lib/CodeGen/MachineOutliner.cpp @@ -66,7 +66,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/IRBuilder.h" Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -238,9 +238,6 @@ /// AArch64 supports the MachineOutliner. bool useMachineOutliner() const override { return true; } - - bool - canOutlineWithoutLRSave(MachineBasicBlock::iterator &CallInsertionPt) const; bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override; outliner::TargetCostInfo getOutliningCandidateInfo( Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -4928,42 +4927,67 @@ HasCalls = 0x4 }; -bool AArch64InstrInfo::canOutlineWithoutLRSave( - MachineBasicBlock::iterator &CallInsertionPt) const { - // Was LR saved in the function containing this basic block? - MachineBasicBlock &MBB = *(CallInsertionPt->getParent()); - LiveRegUnits LRU(getRegisterInfo()); - LRU.addLiveOuts(MBB); - - // Get liveness information from the end of the block to the end of the - // prospective outlined region. - std::for_each(MBB.rbegin(), - (MachineBasicBlock::reverse_iterator)CallInsertionPt, - [&LRU](MachineInstr &MI) { LRU.stepBackward(MI); }); - - // If the link register is available at this point, then we can safely outline - // the region without saving/restoring LR. Otherwise, we must emit a save and - // restore. - return LRU.available(AArch64::LR); -} outliner::TargetCostInfo AArch64InstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { unsigned SequenceSize = - std::accumulate(RepeatedSequenceLocs[0].front(), - std::next(RepeatedSequenceLocs[0].back()), 0, - [this](unsigned Sum, const MachineInstr &MI) { - return Sum + getInstSizeInBytes(MI); - }); + std::accumulate(RepeatedSequenceLocs[0].front(), + std::next(RepeatedSequenceLocs[0].back()), 0, + [this](unsigned Sum, const MachineInstr &MI) { + return Sum + getInstSizeInBytes(MI); + }); + + // Compute liveness information for each candidate. + const TargetRegisterInfo &TRI = getRegisterInfo(); + std::for_each(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + [&TRI](outliner::Candidate &C){ + C.initLRU(TRI); + }); + + // According to + // infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf, + // the following are undefined on entry/exit from a function call: + // + // * Registers x16, x17, (and thus w16, w17) + // * Condition codes (and thus the NZCV register) + // + // Therefore, if we have a situation like this: + // + // I1 (defines, say, x16) + // I2 + // ---- Outlining candidate -------- + // I3 + // ... + // ---- End outlining candidate ---- + // Ik (uses x16) + // + // We cannot outline that candidate. The value of x16 at Ik is not guaranteed + // to be the same as in I1 once we insert a call instruction. Similarly, if + // x16 was defined inside the outlining candidate region, we cannot say that + // the value of x16 at Ik will be the same there. + // + // Thus, we must erase every candidate that violates any of these conditions. + auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) { + LiveRegUnits LRU = C.LRU; + return !LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) || + !LRU.available(AArch64::NZCV); + }; + + // Erase every candidate that violates the restrictions above. (It could be + // true that we have viable candidates, so it's not worth bailing out in the + // case that, say, 1 out of 20 candidates violate the restructions.) + RepeatedSequenceLocs.erase( + std::remove_if(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + CantGuaranteeValueAcrossCall), + RepeatedSequenceLocs.end()); + + // At this point, we have only "safe" candidates to outline. Figure out frame + // + call instruction information. unsigned CallID = MachineOutlinerDefault; unsigned FrameID = MachineOutlinerDefault; unsigned NumBytesForCall = 12; unsigned NumBytesToCreateFrame = 4; - - auto DoesntNeedLRSave = [this](outliner::Candidate &I) { - return canOutlineWithoutLRSave(I.back()); - }; - unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); // If the last instruction in any candidate is a terminator, then we should @@ -4983,8 +5007,11 @@ NumBytesToCreateFrame = 0; } - else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), - DoesntNeedLRSave)) { + else if (std::all_of(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + [](outliner::Candidate &C) { + return C.LRU.available(AArch64::LR); + })) { CallID = MachineOutlinerNoLRSave; FrameID = MachineOutlinerNoLRSave; NumBytesForCall = 4; @@ -5007,8 +5034,11 @@ RepeatedSequenceLocs[0].back()->isCall()) NumBytesToCreateFrame += 8; - return outliner::TargetCostInfo(SequenceSize, NumBytesForCall, - NumBytesToCreateFrame, CallID, FrameID); + return outliner::TargetCostInfo(SequenceSize, + NumBytesForCall, + NumBytesToCreateFrame, + CallID, + FrameID); } bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -11163,7 +11163,7 @@ ); return outliner::TargetCostInfo(SequenceSize, 1, 1, MachineOutlinerDefault, - MachineOutlinerDefault); + MachineOutlinerDefault); } bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF, Index: test/CodeGen/AArch64/machine-outliner-bad-register.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/machine-outliner-bad-register.mir @@ -0,0 +1,67 @@ +# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s + +# Ensure that we don't outline from regions where x16, x17, or nzcv are live +# across the outlining candidate. These values are allowed to be clobbered by, +# say, the linker, in the presence of function calls. Thus, we can't actually +# outline these in the presence of a BL. +--- | + define void @bar(i32 %a) #0 { + ret void + } + + attributes #0 = { noredzone } +... +--- + +name: bar +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $w0, $w8, $w17 + renamable $w16 = ORRWrs $wzr, killed renamable $w0, 0, implicit-def $x16 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w0 = ORRWri $wzr, 4 + INLINEASM &nop, 0, 10, implicit-def $x16, 2147483657, $x16(tied-def 3) + bb.1: + ; CHECK-LABEL: bb.1: + ; CHECK: BL + liveins: $w0, $w8 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w0 = ORRWri $wzr, 4 + bb.2: + ; CHECK-LABEL: bb.2: + ; CHECK: BL + liveins: $w0, $w8 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w0 = ORRWri $wzr, 4 + bb.3: + ; CHECK-LABEL: bb.3: + ; CHECK-NOT: BL + liveins: $w0, $w8 + renamable $w17 = ORRWrs $wzr, killed renamable $w0, 0, implicit-def $x17 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w0 = ORRWri $wzr, 4 + INLINEASM &nop, 0, 10, implicit-def $x17, 2147483657, $x17(tied-def 3) + bb.4: + ; CHECK-LABEL: bb.4: + ; CHECK-NOT: BL + liveins: $w0, $w8, $wzr + $wzr = SUBSWri killed renamable $w8, 10, 0, implicit-def $nzcv + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w0 = ORRWri $wzr, 4 + Bcc 10, %bb.5, implicit killed $nzcv + bb.5: + RET undef $lr