diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1807,6 +1807,18 @@ "Target didn't implement TargetInstrInfo::insertOutlinedCall!"); } + virtual outliner::OutlinedFunction getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs, + bool OnlyTailCalls) const { + return getOutliningCandidateInfo(RepeatedSequenceLocs); + } + + virtual outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags, + bool OnlyTailCalls) const { + return getOutliningType(MIT, Flags); + } + /// Return true if the function can safely be outlined from. /// A function \p MF is considered safe for outlining if an outlined function /// produced from instructions in F will produce a program which produces the diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -105,6 +105,17 @@ cl::desc( "Number of times to rerun the outliner after the initial outline")); +static cl::opt AggressiveTailCallOutlining( + "aggressive-tail-call-outlining", cl::init(false), cl::Hidden, + cl::desc("Perform an extra run of the machine outliner for tail calls " + "(default = off)")); + +static cl::opt AggressiveTailCallOutliningOnly( + "aggressive-tail-call-outlining-only", cl::init(false), cl::Hidden, + cl::desc("Like -aggressive-tail-call-outlining, but stop outlining after " + "outlining the tail calls (ie bail immediately): Meant for " + "testing purposes. (default = off)")); + namespace { /// Represents an undefined index in the suffix tree. @@ -647,6 +658,9 @@ // than one illegal number per range. bool AddedIllegalLastTime = false; + /// Used to toggle Tail Call Only mode. + bool OnlyTailCalls; + /// Maps \p *It to a legal integer. /// /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB, @@ -779,7 +793,7 @@ for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; ++It) { // Keep track of where this instruction is in the module. - switch (TII.getOutliningType(It, Flags)) { + switch (TII.getOutliningType(It, Flags, OnlyTailCalls)) { case InstrType::Illegal: mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB, InstrListForMBB); @@ -859,6 +873,9 @@ /// Set when the pass is constructed in TargetPassConfig. bool RunOnAllFunctions = true; + /// Used to toggle Tail Call Only mode. + bool OnlyTailCalls; + StringRef getPassName() const override { return "Machine Outliner"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -1087,7 +1104,7 @@ CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo(); OutlinedFunction OF = - TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq); + TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq, OnlyTailCalls); // If we deleted too many candidates, then there's nothing worth outlining. // FIXME: This should take target-specified instruction sizes into account. @@ -1111,6 +1128,8 @@ // FIXME: We should have a better naming scheme. This should be stable, // regardless of changes to the outliner's cost model/traversal order. std::string FunctionName = "OUTLINED_FUNCTION_"; + if (OnlyTailCalls) + FunctionName += "AGGRESSIVE_TAIL_CALL_"; if (OutlineRepeatedNum > 0) FunctionName += std::to_string(OutlineRepeatedNum + 1) + "_"; FunctionName += std::to_string(Name); @@ -1489,12 +1508,23 @@ if (M.empty()) return false; + bool Changed = false; + if (AggressiveTailCallOutlining || AggressiveTailCallOutliningOnly) { + // Number to append to the current tail-call-only outlined function. + unsigned OutlinedTailCallFunctionNum = 0; + OnlyTailCalls = true; + Changed = doOutline(M, OutlinedTailCallFunctionNum); + if (AggressiveTailCallOutliningOnly) + return Changed; + } + OnlyTailCalls = false; + // Number to append to the current outlined function. unsigned OutlinedFunctionNum = 0; OutlineRepeatedNum = 0; if (!doOutline(M, OutlinedFunctionNum)) - return false; + return Changed; for (unsigned I = 0; I < OutlinerReruns; ++I) { OutlinedFunctionNum = 0; @@ -1532,6 +1562,7 @@ // it here. OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining; InstructionMapper Mapper; + Mapper.OnlyTailCalls = OnlyTailCalls; // Prepare instruction mappings for the suffix tree. populateMapper(Mapper, M, MMI); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -258,10 +258,16 @@ bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override; + outliner::OutlinedFunction getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs, + bool OnlyTailCalls) const override; + outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags, + bool OnlyTailCalls) const override; outliner::OutlinedFunction getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const override; - outliner::InstrType - getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override; + outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const override; bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override; void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5695,6 +5695,13 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { + return getOutliningCandidateInfo(RepeatedSequenceLocs, + false /* OnlyTailCalls */); +} + +outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( + std::vector &RepeatedSequenceLocs, + bool OnlyTailCalls) const { outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; unsigned SequenceSize = std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, @@ -6075,6 +6082,13 @@ } } + // If we are aggresively outlining all tail calls prior to the first outline + // pass, return here prior to the CFICount check. + if (FrameID != MachineOutlinerTailCall && OnlyTailCalls) { + RepeatedSequenceLocs.clear(); + return outliner::OutlinedFunction(); + } + // If we have CFI instructions, we can only outline if the outlined section // can be a tail call if (FrameID != MachineOutlinerTailCall && CFICount > 0) @@ -6179,6 +6193,12 @@ outliner::InstrType AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const { + return getOutliningType(MIT, Flags, false /* OnlyTailCalls */ ); +} + +outliner::InstrType +AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags, bool OnlyTailCalls) const { MachineInstr &MI = *MIT; MachineBasicBlock *MBB = MI.getParent(); MachineFunction *MF = MBB->getParent(); @@ -6236,6 +6256,12 @@ MOP.isTargetIndex()) return outliner::InstrType::Illegal; + // If we are in OnlyTailCalls mode, ignore the presence of W30 register. + // This improves outlining of tail calls by one or two instructions over the + // CFI outlining alone. + if (OnlyTailCalls) + continue; + // If it uses LR or W30 explicitly, then don't touch it. if (MOP.isReg() && !MOP.isImplicit() && (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30)) @@ -6313,6 +6339,9 @@ if (MI.isPosition()) return outliner::InstrType::Illegal; + if (OnlyTailCalls) + return outliner::InstrType::Legal; + // Don't touch the link register or W30. if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) || MI.modifiesRegister(AArch64::W30, &getRegisterInfo())) diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-aggressive-tail.mir b/llvm/test/CodeGen/AArch64/machine-outliner-aggressive-tail.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-aggressive-tail.mir @@ -0,0 +1,134 @@ +#RUN: llc -mtriple aarch64--- -verify-machineinstrs -run-pass=machine-outliner \ +#RUN: -aggressive-tail-call-outlining -o - %s | FileCheck %s + +#RUN: llc -mtriple aarch64--- -verify-machineinstrs -run-pass=machine-outliner \ +#RUN: -aggressive-tail-call-outlining-only -o - %s | FileCheck %s + +#CHECK: name: OUTLINED_FUNCTION_AGGRESSIVE_TAIL_CALL_0 +#CHECK: LDPXi $sp, 4 +#CHECK-NEXT: LDPXi $sp, 2 +#CHECK-NEXT: LDRXpost $sp, 48 +#CHECK-NEXT: RET + +--- | + define void @a(i32 %a, i32 %b, i32 %c, i32 %d) { unreachable } + define void @b(i32 %a, i32 %b, i32 %c, i32 %d) { unreachable } + define void @c(i32 %a, i32 %b, i32 %c, i32 %d) { unreachable } + declare void @z(i32, i32, i32, i32) +... +--- +name: a +tracksRegLiveness: true +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $w0, $w1, $w2, $w3, $x22, $x20, $x21, $lr, $x19 + early-clobber $sp = frame-setup STRXpre killed $x22, $sp, -48 :: (store 8) + frame-setup STPXi killed $x21, killed $x20, $sp, 2 :: (store 8), (store 8) + frame-setup STPXi killed $x19, killed $lr, $sp, 4 :: (store 8), (store 8) + frame-setup CFI_INSTRUCTION def_cfa_offset 48 + frame-setup CFI_INSTRUCTION offset $w30, -8 + frame-setup CFI_INSTRUCTION offset $w19, -16 + frame-setup CFI_INSTRUCTION offset $w20, -24 + frame-setup CFI_INSTRUCTION offset $w21, -32 + frame-setup CFI_INSTRUCTION offset $w22, -48 + $w19 = ORRWrs $wzr, $w3, 0 + $w20 = ORRWrs $wzr, $w2, 0 + $w21 = ORRWrs $wzr, $w1, 0 + $w22 = ORRWrs $wzr, $w0, 0 + BL @z, csr_aarch64_aapcs + $w0 = ORRWri $wzr, 0 + $w1 = ORRWri $wzr, 1984 + $w2 = ORRWri $wzr, 1 + $w3 = ORRWri $wzr, 1920 + BL @z, csr_aarch64_aapcs + $w0 = ORRWrs $wzr, killed $w22, 0 + $w1 = ORRWrs $wzr, killed $w21, 0 + $w2 = ORRWrs $wzr, killed $w20, 0 + $w3 = ORRWrs $wzr, killed $w19, 0 + BL @z, csr_aarch64_aapcs + $x19, $lr = frame-destroy LDPXi $sp, 4 :: (load 8), (load 8) + $x21, $x20 = frame-destroy LDPXi $sp, 2 :: (load 8), (load 8) + early-clobber $sp, $x22 = frame-destroy LDRXpost $sp, 48 :: (load 8) + RET undef $lr + +... +--- +name: b +tracksRegLiveness: true +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $w0, $w1, $w2, $w3, $x22, $x20, $x21, $lr, $x19 + early-clobber $sp = frame-setup STRXpre killed $x22, $sp, -48 :: (store 8) + frame-setup STPXi killed $x21, killed $x20, $sp, 2 :: (store 8), (store 8) + frame-setup STPXi killed $x19, killed $lr, $sp, 4 :: (store 8), (store 8) + frame-setup CFI_INSTRUCTION def_cfa_offset 48 + frame-setup CFI_INSTRUCTION offset $w30, -8 + frame-setup CFI_INSTRUCTION offset $w19, -16 + frame-setup CFI_INSTRUCTION offset $w20, -24 + frame-setup CFI_INSTRUCTION offset $w21, -32 + frame-setup CFI_INSTRUCTION offset $w22, -48 + $w19 = ORRWrs $wzr, $w3, 0 + $w20 = ORRWrs $wzr, $w2, 0 + $w21 = ORRWrs $wzr, $w1, 0 + $w22 = ORRWrs $wzr, $w0, 0 + BL @z, csr_aarch64_aapcs + $w0 = ORRWri $wzr, 1920 + $w1 = ORRWri $wzr, 1 + $w2 = ORRWri $wzr, 1984 + $w3 = ORRWri $wzr, 0 + BL @z, csr_aarch64_aapcs + $w0 = ORRWrs $wzr, killed $w22, 0 + $w1 = ORRWrs $wzr, killed $w21, 0 + $w2 = ORRWrs $wzr, killed $w20, 0 + $w3 = ORRWrs $wzr, killed $w19, 0 + BL @z, csr_aarch64_aapcs + $x19, $lr = frame-destroy LDPXi $sp, 4 :: (load 8), (load 8) + $x21, $x20 = frame-destroy LDPXi $sp, 2 :: (load 8), (load 8) + early-clobber $sp, $x22 = frame-destroy LDRXpost $sp, 48 :: (load 8) + RET undef $lr + +... + +... +--- +name: c +tracksRegLiveness: true +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $w0, $w1, $w2, $w3, $x22, $x20, $x21, $lr, $x19 + early-clobber $sp = frame-setup STRXpre killed $x22, $sp, -48 :: (store 8) + frame-setup STPXi killed $x21, killed $x20, $sp, 2 :: (store 8), (store 8) + frame-setup STPXi killed $x19, killed $lr, $sp, 4 :: (store 8), (store 8) + frame-setup CFI_INSTRUCTION def_cfa_offset 48 + frame-setup CFI_INSTRUCTION offset $w30, -8 + frame-setup CFI_INSTRUCTION offset $w19, -16 + frame-setup CFI_INSTRUCTION offset $w20, -24 + frame-setup CFI_INSTRUCTION offset $w21, -32 + frame-setup CFI_INSTRUCTION offset $w22, -48 + $w19 = ORRWrs $wzr, $w3, 0 + $w20 = ORRWrs $wzr, $w2, 0 + $w21 = ORRWrs $wzr, $w1, 0 + $w22 = ORRWrs $wzr, $w0, 0 + BL @z, csr_aarch64_aapcs + $w0 = ORRWri $wzr, 1920 + $w1 = ORRWri $wzr, 1 + $w2 = ORRWri $wzr, 1984 + $w3 = ORRWri $wzr, 0 + BL @z, csr_aarch64_aapcs + $w0 = ORRWrs $wzr, killed $w22, 0 + $w1 = ORRWrs $wzr, killed $w21, 0 + $w2 = ORRWrs $wzr, killed $w20, 0 + $w3 = ORRWrs $wzr, killed $w19, 0 + BL @z, csr_aarch64_aapcs + $x19, $lr = frame-destroy LDPXi $sp, 4 :: (load 8), (load 8) + $x21, $x20 = frame-destroy LDPXi $sp, 2 :: (load 8), (load 8) + early-clobber $sp, $x22 = frame-destroy LDRXpost $sp, 48 :: (load 8) + RET undef $lr + +...