Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -1554,15 +1554,26 @@ } /// \brief Returns the number of instructions that will be taken to call a - /// function defined by the sequence on the closed interval [ \p StartIt, \p - /// EndIt]. + /// function defined by a collection of closed intervals defining repeated + /// sequences. + /// + /// \param CandidateClass A collection of [Start,End] intervals definining the + /// start and end points of a repeated sequence. /// /// \returns The number of instructions for the call in the first member, /// and a target-defined unsigned describing what type of call to emit in the /// second member. - virtual std::pair - getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt, - MachineBasicBlock::iterator &EndIt) const { + virtual std::pair getOutliningCallOverhead( + std::vector< + std::pair> + &CandidateClass) const { + llvm_unreachable( + "Target didn't implement TargetInstrInfo::getOutliningCallOverhead!"); + } + + /// \brief Returns the number of instructions taken to call a candidate with + /// a given \p CallClass. + virtual unsigned getOutliningCallOverhead(unsigned CallClass) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::getOutliningCallOverhead!"); } Index: lib/CodeGen/MachineOutliner.cpp =================================================================== --- lib/CodeGen/MachineOutliner.cpp +++ lib/CodeGen/MachineOutliner.cpp @@ -851,9 +851,6 @@ if (StringLen < 2) continue; - size_t CallOverhead = 0; - size_t SequenceOverhead = StringLen; - // If this is a beneficial class of candidate, then every one is stored in // this vector. std::vector CandidatesForRepeatedSeq; @@ -874,13 +871,8 @@ MachineBasicBlock::iterator EndIt = Mapper.InstrList[M->SuffixIdx + StringLen - 1]; - // Get the overhead for calling a function for this sequence and any - // target-specified data for how to construct the call. - std::pair CallOverheadPair = - TII.getOutliningCallOverhead(StartIt, EndIt); - CallOverhead += CallOverheadPair.first; CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen, FnIdx, - CallOverheadPair.second); + 0); CandidateClass.emplace_back(std::make_pair(StartIt, EndIt)); // Never visit this leaf again. @@ -888,6 +880,15 @@ } } + size_t SequenceOverhead = StringLen; + + // Get the overall cost of calling every candidate in this class, and the + // way that they should all be outlined. + std::pair CallOverheadPair = + TII.getOutliningCallOverhead(CandidateClass); + size_t CallOverhead = CallOverheadPair.first; + + // Do the same thing for the frame for the candidates. std::pair FrameOverheadPair = TII.getOutliningFrameOverhead(CandidateClass); size_t FrameOverhead = FrameOverheadPair.first; @@ -895,6 +896,7 @@ size_t OutliningCost = CallOverhead + FrameOverhead + SequenceOverhead; size_t NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount; + // Outlining would produce more instructions than not, so move on. if (NotOutliningCost <= OutliningCost) continue; @@ -907,6 +909,7 @@ // benefit values and save them in the candidate list. for (Candidate &C : CandidatesForRepeatedSeq) { C.Benefit = Benefit; + C.CallClass = CallOverheadPair.second; CandidateList.push_back(C); } @@ -1015,11 +1018,7 @@ F2.OccurrenceCount--; // Remove the call overhead from the removed sequence. - MachineBasicBlock::iterator StartIt = Mapper.InstrList[C2.StartIdx]; - MachineBasicBlock::iterator EndIt = - Mapper.InstrList[C2.StartIdx + C2.Len - 1]; - - F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt).first; + F2.Benefit += TII.getOutliningCallOverhead(C2.CallClass); // Add back one instance of the sequence. if (F2.Sequence.size() > F2.Benefit) @@ -1042,11 +1041,7 @@ F1.OccurrenceCount--; // Remove the call overhead from the removed sequence. - MachineBasicBlock::iterator StartIt = Mapper.InstrList[C1.StartIdx]; - MachineBasicBlock::iterator EndIt = - Mapper.InstrList[C1.StartIdx + C1.Len - 1]; - - F1.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt).first; + F1.Benefit += TII.getOutliningCallOverhead(C1.CallClass); // Add back one instance of the sequence. if (F1.Sequence.size() > F1.Benefit) @@ -1148,7 +1143,6 @@ InstructionMapper &Mapper) { bool OutlinedSomething = false; - // Replace the candidates with calls to their respective outlined functions. for (const Candidate &C : CandidateList) { Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -349,10 +349,13 @@ ArrayRef> getSerializableMachineMemOperandTargetFlags() const override; + bool canOutlineWithoutLRSave(MachineBasicBlock &MBB) const; bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; - std::pair - getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt, - MachineBasicBlock::iterator &EndIt) const override; + std::pair getOutliningCallOverhead( + std::vector< + std::pair> + &CandidateClass) const override; + unsigned getOutliningCallOverhead(unsigned CallClass) const override; std::pair getOutliningFrameOverhead( std::vector< std::pair> Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -4433,21 +4434,85 @@ } // Constants defining how certain sequences should be outlined. -const unsigned MachineOutlinerDefaultFn = 0; -const unsigned MachineOutlinerTailCallFn = 1; +enum MachineOutlinerClass { + MachineOutlinerDefault, + MachineOutlinerTailCall, + MachineOutlinerNoLRSave +}; + +bool AArch64InstrInfo::canOutlineWithoutLRSave(MachineBasicBlock &MBB) const { + // Was LR saved in the function containing this basic block? + MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); + const std::vector &CSI = MFI.getCalleeSavedInfo(); + + bool CalleeSavedLR = false; + for (auto CalleeSave : CSI) { + if (CalleeSave.getReg() == AArch64::LR) { + CalleeSavedLR = true; + break; + } + } + + if (!CalleeSavedLR) + return false; + + LiveRegUnits LRU(getRegisterInfo()); + LRU.addLiveOuts(MBB); + for (auto I = MBB.rbegin(); I != MBB.rend(); LRU.stepBackward(*I), ++I) { + // Is the link register dead at this point? + if (!LRU.available(AArch64::LR)) + return false; + } + + // The link register is dead throughout the entire block. We can skip saving + // and restoring it. + return true; +} + +unsigned AArch64InstrInfo::getOutliningCallOverhead(unsigned CallClass) const { + unsigned Overhead = 1; + + switch (CallClass) { + default: + llvm_unreachable("Unknown call class!"); + case MachineOutlinerDefault: + Overhead = 3; + break; + case MachineOutlinerTailCall: + case MachineOutlinerNoLRSave: + break; + } + + return Overhead; +} std::pair AArch64InstrInfo::getOutliningCallOverhead( - MachineBasicBlock::iterator &StartIt, - MachineBasicBlock::iterator &EndIt) const { - // Is this a tail-call? - if (EndIt->isTerminator()) { - // Yes, so we only have to emit a call. Return a cost of 1 + signify that - // this candidate should be tail-called. - return std::make_pair(1, MachineOutlinerTailCallFn); + std::vector> &CandidateClass) const { + + // If the last instruction in any candidate is a terminator, then we should + // tail call all of the candidates. + if (CandidateClass[0].second->isTerminator()) + return std::make_pair(CandidateClass.size(), MachineOutlinerTailCall); + + unsigned Type = MachineOutlinerNoLRSave; + unsigned NumInstrsToCallFn = 1; + + // Check if it's possible to skip saving LR in every candidate. + // TODO: We should be able to get some more candidates by saving + restoring + // with a scratch register wherever possible. + // TODO: If two candidates can call the same outlined function (for example, + // neither uses the stack), we should allow them to belong to the same + // candidate class, but support different cost models. + for (const auto &Interval : CandidateClass) { + if (!canOutlineWithoutLRSave(*(Interval.first->getParent()))) { + Type = MachineOutlinerDefault; + NumInstrsToCallFn = 3; + break; + } } - // No, so save + restore LR. - return std::make_pair(3, MachineOutlinerDefaultFn); + return std::make_pair(CandidateClass.size() * NumInstrsToCallFn, Type); } std::pair AArch64InstrInfo::getOutliningFrameOverhead( @@ -4456,14 +4521,23 @@ // Is the last instruction in this class a terminator? if (CandidateClass[0].second->isTerminator()) - return std::make_pair(0, MachineOutlinerTailCallFn); + return std::make_pair(0, MachineOutlinerTailCall); + + // No, so we have to add a return to the end. Check if this call is going to + // be NoLRSave. + + // Are each of these NoLRSave calls? + for (const auto &Interval : CandidateClass) { + if (!canOutlineWithoutLRSave(*(Interval.first->getParent()))) + return std::make_pair(1, MachineOutlinerDefault); + } - // No, so we have to add a return to the end. - return std::make_pair(1, MachineOutlinerDefaultFn); + return std::make_pair(1, MachineOutlinerNoLRSave); } bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { - return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); + return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone) && + !MF.getFunction()->hasAddressTaken(); } AArch64GenInstrInfo::MachineOutlinerInstrType @@ -4505,10 +4579,6 @@ if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || MOP.isTargetIndex()) return MachineOutlinerInstrType::Illegal; - - // Don't outline anything that uses the link register. - if (MOP.isReg() && getRegisterInfo().regsOverlap(MOP.getReg(), AArch64::LR)) - return MachineOutlinerInstrType::Illegal; } // Does this use the stack? @@ -4583,7 +4653,7 @@ unsigned FrameClass) const { // If this is a tail call outlined function, then there's already a return. - if (FrameClass == MachineOutlinerTailCallFn) + if (FrameClass == MachineOutlinerTailCall) return; // It's not a tail call, so we have to insert the return ourselves. @@ -4591,6 +4661,11 @@ .addReg(AArch64::LR, RegState::Undef); MBB.insert(MBB.end(), ret); + // Did we have to modify the stack by saving the link register? + if (FrameClass == MachineOutlinerNoLRSave) + return; + + // We modified the stack. // Walk over the basic block and fix up all the stack accesses. fixupPostOutline(MBB); } @@ -4604,7 +4679,7 @@ MachineFunction &MF, unsigned CallClass) const { // Are we tail calling? - if (CallClass == MachineOutlinerTailCallFn) { + if (CallClass == MachineOutlinerTailCall) { // If yes, then we can just branch to the label. It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::B)) @@ -4612,8 +4687,16 @@ return It; } - // We're not tail calling, so we have to save LR before the call and restore - // it after. + // Are we saving the link register? + if (CallClass == MachineOutlinerNoLRSave) { + // No, so just insert the call. + It = MBB.insert(It, + BuildMI(MF, DebugLoc(), get(AArch64::BL)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + return It; + } + + // We have a default call. Save the link register. MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) .addReg(AArch64::SP, RegState::Define) .addReg(AArch64::LR) Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -559,9 +559,12 @@ ArrayRef> getSerializableDirectMachineOperandTargetFlags() const override; - std::pair - getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt, - MachineBasicBlock::iterator &EndIt) const override; + std::pair getOutliningCallOverhead( + std::vector< + std::pair> + &CandidateClass) const override; + + unsigned getOutliningCallOverhead(unsigned CallClass) const override; std::pair getOutliningFrameOverhead( std::vector< Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -10574,15 +10574,19 @@ const unsigned MachineOutlinerDefaultFn = 0; const unsigned MachineOutlinerTailCallFn = 1; +unsigned X86InstrInfo::getOutliningCallOverhead(unsigned CallClass) const { + return 1; +} + std::pair X86InstrInfo::getOutliningCallOverhead( - MachineBasicBlock::iterator &StartIt, - MachineBasicBlock::iterator &EndIt) const { + std::vector> + &CandidateClass) const { // Is this a tail call? If it is, make note of it. - if (EndIt->isTerminator()) - return std::make_pair(1, MachineOutlinerTailCallFn); + if (CandidateClass[0].second->isTerminator()) + return std::make_pair(CandidateClass.size(), MachineOutlinerTailCallFn); - return std::make_pair(1, MachineOutlinerDefaultFn); + return std::make_pair(CandidateClass.size(), MachineOutlinerDefaultFn); } std::pair X86InstrInfo::getOutliningFrameOverhead( Index: test/CodeGen/AArch64/machine-outliner.mir =================================================================== --- test/CodeGen/AArch64/machine-outliner.mir +++ test/CodeGen/AArch64/machine-outliner.mir @@ -7,26 +7,47 @@ ret i32 0 } - attributes #0 = { noinline noredzone nounwind optnone ssp uwtable } - -# CHECK-LABEL: @OUTLINED_FUNCTION_0 + define void @bar(i32 %a) #0 { + entry: + %a.addr = alloca i32, align 4 + %x = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.end + + if.then: + store i32 1, i32* %x, align 4 + br label %if.end + + if.end: + ret void + } + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { noinline noredzone nounwind optnone ssp uwtable "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "stack-protector-buffer-size"="8" } + attributes #1 = { nounwind } ... --- # This test ensures that we # - Create outlined functions # - Don't outline anything to do with LR or W30 +# - Save LR when it's not available # # CHECK-LABEL: name: main # CHECK: BL @OUTLINED_FUNCTION_0 -# CHECK: STRHHroW %w16, %x9, %w30, 1, 1 -# CHECK: %lr = ORRXri %xzr, 1 +# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 +# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 +# CHECK-NEXT: %lr = ORRXri %xzr, 1 # CHECK: BL @OUTLINED_FUNCTION_0 -# CHECK: STRHHroW %w16, %x9, %w30, 1, 1 -# CHECK: %lr = ORRXri %xzr, 1 +# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 +# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 +# CHECK-NEXT: %lr = ORRXri %xzr, 1 # CHECK: BL @OUTLINED_FUNCTION_0 -# CHECK: STRHHroW %w16, %x9, %w30, 1, 1 -# CHECK: %lr = ORRXri %xzr, 1 +# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 +# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 +# CHECK-NEXT: %lr = ORRXri %xzr, 1 name: main alignment: 2 tracksRegLiveness: true @@ -78,4 +99,79 @@ %sp = ADDXri %sp, 16, 0 RET undef %lr - \ No newline at end of file + +... +--- +# This test ensures that we can avoid saving LR when it's available. +# CHECK-LABEL: bb.1.if.then: +# CHECK: BL @OUTLINED_FUNCTION_1, implicit-def %lr, implicit %sp +# CHECK-NEXT: %w17 = ORRWri %wzr, 2 +# CHECK-NEXT: BL @OUTLINED_FUNCTION_1, implicit-def %lr, implicit %sp +# CHECK-NEXT: %w8 = ORRWri %wzr, 0 +name: bar +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: +liveins: + - { reg: '%w0', virtual-reg: '' } +frameInfo: + stackSize: 32 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 +fixedStack: +stack: + - { id: 0, name: a.addr, type: default, offset: -20, size: 4, alignment: 4, + stack-id: 0, callee-saved-register: '', local-offset: -4, di-variable: '', + di-expression: '', di-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '%lr', di-variable: '', di-expression: '', + di-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '%fp', di-variable: '', di-expression: '', + di-location: '' } +constants: +body: | + bb.0.entry: + successors: %bb.2.if.end(0x40000000), %bb.1.if.then(0x40000000) + liveins: %w0, %lr + + %sp = frame-setup SUBXri %sp, 32, 0 + frame-setup STPXi killed %fp, killed %lr, %sp, 2 :: (store 8 into %stack.3), (store 8 into %stack.2) + %fp = frame-setup ADDXri %sp, 16, 0 + frame-setup CFI_INSTRUCTION def_cfa %w29, 16 + frame-setup CFI_INSTRUCTION offset %w30, -8 + frame-setup CFI_INSTRUCTION offset %w29, -16 + STURWi killed %w0, %fp, -4 :: (store 4 into %stack.0.a.addr) + %w8 = LDURWi %fp, -4 :: (load 4 from %stack.0.a.addr) + CBNZW killed %w8, %bb.2.if.end + + bb.1.if.then: + successors: %bb.2.if.end(0x80000000) + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 2 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w8 = ORRWri %wzr, 0 + + bb.2.if.end: + %fp, %lr = LDPXi %sp, 2 :: (load 8 from %stack.3), (load 8 from %stack.2) + %sp = ADDXri %sp, 32, 0 + RET undef %lr + +... + +# CHECK-LABEL: name: OUTLINED_FUNCTION_0 +# CHECK=LABEL: name: OUTLINED_FUNCTION_1 \ No newline at end of file