Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -1518,8 +1518,8 @@ /// \brief Return how many instructions would be saved by outlining a /// sequence containing \p SequenceSize instructions that appears /// \p Occurrences times in a module. - virtual unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences) - const { + virtual unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences, + bool CanBeTailCall) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::getOutliningBenefit!"); } @@ -1531,7 +1531,7 @@ /// shouldn't actually impact the outlining result. enum MachineOutlinerInstrType {Legal, Illegal, Invisible}; - /// Return true if the instruction is legal to outline. + /// Returns how or if \p MI should be outlined. virtual MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::getOutliningType!"); @@ -1541,7 +1541,8 @@ /// This may be empty, in which case no epilogue or return statement will be /// emitted. virtual void insertOutlinerEpilogue(MachineBasicBlock &MBB, - MachineFunction &MF) const { + MachineFunction &MF, + bool IsTailCall) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::insertOutlinerEpilogue!"); } @@ -1551,8 +1552,8 @@ /// implemented by the target. virtual MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, - MachineBasicBlock::iterator &It, MachineFunction &MF) - const { + MachineBasicBlock::iterator &It, MachineFunction &MF, + bool IsTailCall) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::insertOutlinedCall!"); } @@ -1560,14 +1561,15 @@ /// Insert a custom prologue for outlined functions. /// This may be empty, in which case no prologue will be emitted. virtual void insertOutlinerPrologue(MachineBasicBlock &MBB, - MachineFunction &MF) const { + MachineFunction &MF, + bool IsTailCall) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::insertOutlinerPrologue!"); } /// Return true if the function can safely be outlined from. /// By default, this means that the function has no red zone. - virtual bool isFunctionSafeToOutlineFrom(MachineFunction &F) const { + virtual bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const { llvm_unreachable("Target didn't implement " "TargetInstrInfo::isFunctionSafeToOutlineFrom!"); } Index: lib/CodeGen/MachineOutliner.cpp =================================================================== --- lib/CodeGen/MachineOutliner.cpp +++ lib/CodeGen/MachineOutliner.cpp @@ -516,6 +516,10 @@ public: + unsigned operator[](const size_t i) const { + return Str[i]; + } + /// \brief Return a substring of the tree with maximum benefit if such a /// substring exists. /// @@ -800,11 +804,13 @@ /// The number of instructions this function would save. unsigned Benefit = 0; + bool IsTailCall = false; + OutlinedFunction(size_t Name, size_t OccurrenceCount, const std::vector &Sequence, - unsigned Benefit) + unsigned Benefit, bool IsTailCall) : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence), - Benefit(Benefit) + Benefit(Benefit), IsTailCall(IsTailCall) {} }; @@ -1009,7 +1015,9 @@ /// \returns The length of the longest candidate found. 0 if there are none. unsigned buildCandidateList(std::vector &CandidateList, std::vector &FunctionList, - SuffixTree &ST, const TargetInstrInfo &TII); + SuffixTree &ST, + InstructionMapper &Mapper, + const TargetInstrInfo &TII); /// \brief Remove any overlapping candidates that weren't handled by the /// suffix tree's pruning method. @@ -1136,7 +1144,8 @@ // is being removed. F2.OccurrenceCount--; F2.Benefit = TII.getOutliningBenefit(F2.Sequence.size(), - F2.OccurrenceCount + F2.OccurrenceCount, + F2.IsTailCall ); // Mark C2 as not in the list. @@ -1155,6 +1164,7 @@ MachineOutliner::buildCandidateList(std::vector &CandidateList, std::vector &FunctionList, SuffixTree &ST, + InstructionMapper &Mapper, const TargetInstrInfo &TII) { std::vector CandidateSequence; // Current outlining candidate. @@ -1163,7 +1173,8 @@ // Function for maximizing query in the suffix tree. // This allows us to define more fine-grained types of things to outline in // the target without putting target-specific info in the suffix tree. - auto BenefitFn = [&TII](const SuffixTreeNode &Curr, size_t StringLen) { + auto BenefitFn = [&TII, &ST, &Mapper](const SuffixTreeNode &Curr, + size_t StringLen) { // Any leaf whose parent is the root only has one occurrence. if (Curr.Parent->isRoot()) @@ -1180,7 +1191,16 @@ if (Occurrences < 2) return 0u; - return TII.getOutliningBenefit(StringLen, Occurrences); + // Check if the last instruction in the sequence is a return. + MachineInstr *LastInstr = + Mapper.IntegerInstructionMap[ST[Curr.SuffixIdx + StringLen - 1]]; + assert(LastInstr && "Last instruction in sequence was unmapped!"); + + // The only way a terminator could be mapped as legal is if it was safe to + // tail call. + bool IsTailCall = LastInstr->isTerminator(); + + return TII.getOutliningBenefit(StringLen, Occurrences, IsTailCall); }; // Repeatedly query the suffix tree for the substring that maximizes @@ -1204,10 +1224,19 @@ if (CandidateSequence.size() > MaxCandidateLen) MaxCandidateLen = CandidateSequence.size(); + MachineInstr *LastInstr = + Mapper.IntegerInstructionMap[CandidateSequence.back()]; + assert(LastInstr && "Last instruction in sequence was unmapped!"); + + // The only way a terminator could be mapped as legal is if it was safe to + // tail call. + bool IsTailCall = LastInstr->isTerminator(); + // Keep track of the benefit of outlining this candidate in its // OutlinedFunction. unsigned FnBenefit = TII.getOutliningBenefit(CandidateSequence.size(), - Occurrences.size() + Occurrences.size(), + IsTailCall ); assert(FnBenefit > 0 && "Function cannot be unbeneficial!"); @@ -1217,7 +1246,8 @@ FunctionList.size(), // Number of this function. Occurrences.size(), // Number of occurrences. CandidateSequence, // Sequence to outline. - FnBenefit // Instructions saved by outlining this function. + FnBenefit, // Instructions saved by outlining this function. + IsTailCall // Flag set if this function is to be tail called. ); // Save each of the occurrences of the candidate so we can outline them. @@ -1273,7 +1303,7 @@ // Insert the new function into the module. MF.insert(MF.begin(), &MBB); - TII.insertOutlinerPrologue(MBB, MF); + TII.insertOutlinerPrologue(MBB, MF, OF.IsTailCall); // Copy over the instructions for the function using the integer mappings in // its sequence. @@ -1288,7 +1318,7 @@ MBB.insert(MBB.end(), NewMI); } - TII.insertOutlinerEpilogue(MBB, MF); + TII.insertOutlinerEpilogue(MBB, MF, OF.IsTailCall); return &MF; } @@ -1335,7 +1365,7 @@ const TargetInstrInfo &TII = *STI.getInstrInfo(); // Insert a call to the new function and erase the old sequence. - TII.insertOutlinedCall(M, *MBB, StartIt, *MF); + TII.insertOutlinedCall(M, *MBB, StartIt, *MF, OF.IsTailCall); StartIt = Mapper.InstrList[C.StartIdx]; MBB->erase(StartIt, EndIt); @@ -1392,7 +1422,7 @@ std::vector FunctionList; unsigned MaxCandidateLen = - buildCandidateList(CandidateList, FunctionList, ST, *TII); + buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII); pruneOverlaps(CandidateList, FunctionList, MaxCandidateLen, *TII); return outline(M, CandidateList, FunctionList, Mapper); Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -546,7 +546,8 @@ bool isTailCall(const MachineInstr &Inst) const override; unsigned getOutliningBenefit(size_t SequenceSize, - size_t Occurrences) const override; + size_t Occurrences, + bool CanBeTailCall) const override; bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; @@ -554,16 +555,18 @@ getOutliningType(MachineInstr &MI) const override; void insertOutlinerEpilogue(MachineBasicBlock &MBB, - MachineFunction &MF) const override; + MachineFunction &MF, + bool IsTailCall) const override; void insertOutlinerPrologue(MachineBasicBlock &MBB, - MachineFunction &MF) const override; + MachineFunction &MF, + bool isTailCall) const override; MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, - MachineFunction &MF) const override; - + MachineFunction &MF, + bool IsTailCall) const override; protected: /// Commutes the operands in the given instruction by changing the operands /// order and/or changing the instruction's opcode and/or the immediate value Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -10387,13 +10387,21 @@ llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } unsigned X86InstrInfo::getOutliningBenefit(size_t SequenceSize, - size_t Occurrences) const { + size_t Occurrences, + bool CanBeTailCall) const { unsigned NotOutlinedSize = SequenceSize * Occurrences; - - // Sequence appears once in outlined function (Sequence.size()) - // One return instruction (+1) - // One call per occurrence (Occurrences) - unsigned OutlinedSize = (SequenceSize + 1) + Occurrences; + unsigned OutlinedSize; + + // Is it a tail call? + if (CanBeTailCall) { + // If yes, we don't have to include a return instruction-- it's already in + // our sequence. So we have one occurrence of the sequence + #Occurrences + // calls. + OutlinedSize = SequenceSize + Occurrences; + } else { + // If not, add one for the return instruction. + OutlinedSize = (SequenceSize + 1) + Occurrences; + } // Return the number of instructions saved by outlining this sequence. return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0; @@ -10406,9 +10414,24 @@ X86GenInstrInfo::MachineOutlinerInstrType X86InstrInfo::getOutliningType(MachineInstr &MI) const { - // Don't outline returns or basic block terminators. - if (MI.isReturn() || MI.isTerminator()) + // Don't allow debug values to impact outlining type. + if (MI.isDebugValue() || MI.isIndirectDebugValue()) + return MachineOutlinerInstrType::Invisible; + + // Is this a tail call? If yes, we can outline as a tail call. + if (isTailCall(MI)) + return MachineOutlinerInstrType::Legal; + + // Is this the terminator of a basic block? + if (MI.isTerminator() || MI.isReturn()) { + + // Does its parent have any successors in its MachineFunction? + if (MI.getParent()->succ_empty()) + return MachineOutlinerInstrType::Legal; + + // It does, so we can't tail call it. return MachineOutlinerInstrType::Illegal; + } // Don't outline anything that modifies or reads from the stack pointer. // @@ -10421,47 +10444,65 @@ // catch it. if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) || MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) || - MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) + MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) return MachineOutlinerInstrType::Illegal; + // Outlined calls change the instruction pointer, so don't read from it. if (MI.readsRegister(X86::RIP, &RI) || MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) || MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP)) return MachineOutlinerInstrType::Illegal; + // Positions can't safely be outlined. if (MI.isPosition()) return MachineOutlinerInstrType::Illegal; + // Make sure none of the operands of this instruction do anything tricky. for (const MachineOperand &MOP : MI.operands()) if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || MOP.isTargetIndex()) return MachineOutlinerInstrType::Illegal; - // Don't allow debug values to impact outlining type. - if (MI.isDebugValue() || MI.isIndirectDebugValue()) - return MachineOutlinerInstrType::Invisible; - return MachineOutlinerInstrType::Legal; } void X86InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB, - MachineFunction &MF) const { + MachineFunction &MF, + bool IsTailCall) const { + + // If we're a tail call, we already have a return, so don't do anything. + if (IsTailCall) + return; + // We're a normal call, so our sequence doesn't have a return instruction. + // Add it in. MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RETQ)); MBB.insert(MBB.end(), retq); } void X86InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB, - MachineFunction &MF) const { + MachineFunction &MF, + bool IsTailCall) const { return; } MachineBasicBlock::iterator X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, - MachineFunction &MF) const { - It = MBB.insert(It, + MachineFunction &MF, + bool IsTailCall) const { + // Is it a tail call? + if (IsTailCall) { + // Yes, just insert a JMP. + It = MBB.insert(It, + BuildMI(MF, DebugLoc(), get(X86::JMP_1)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + } else { + // No, insert a call. + It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32)) .addGlobalAddress(M.getNamedValue(MF.getName()))); + } + return It; } Index: test/CodeGen/X86/machine-outliner-tailcalls.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/machine-outliner-tailcalls.ll @@ -0,0 +1,35 @@ +; RUN: llc -enable-machine-outliner -mtriple=x86_64-apple-darwin < %s | FileCheck %s + +@x = common local_unnamed_addr global i32 0, align 4 + +define i32 @foo0(i32) local_unnamed_addr #0 { +; CHECK-LABEL: _foo0: +; CHECK: jmp l_OUTLINED_FUNCTION_0 +; CHECK-NEXT: .cfi_endproc + store i32 0, i32* @x, align 4, !tbaa !2 + %2 = tail call i32 @ext(i32 1) #2 + ret i32 undef +} + +declare i32 @ext(i32) local_unnamed_addr #1 + +define i32 @foo1(i32) local_unnamed_addr #0 { +; CHECK-LABEL: _foo1: +; CHECK: jmp l_OUTLINED_FUNCTION_0 +; CHECK-NEXT: .cfi_endproc + store i32 0, i32* @x, align 4, !tbaa !2 + %2 = tail call i32 @ext(i32 1) #2 + ret i32 undef +} + +attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="false" } + +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} + +; CHECK-LABEL: l_OUTLINED_FUNCTION_0: +; CHECK: movl $0, (%rax) +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp _ext \ No newline at end of file Index: test/CodeGen/X86/machine-outliner.ll =================================================================== --- test/CodeGen/X86/machine-outliner.ll +++ test/CodeGen/X86/machine-outliner.ll @@ -15,7 +15,7 @@ %7 = icmp ne i32 %6, 0 br i1 %7, label %9, label %8 - ; CHECK: callq l_OUTLINED_FUNCTION_1 + ; CHECK: callq [[OFUNC1:l_OUTLINED_FUNCTION_[0-9]+]] ; CHECK: cmpl $0, -{{[0-9]+}}(%rbp) store i32 1, i32* %2, align 4 store i32 2, i32* %3, align 4 @@ -30,7 +30,7 @@ %12 = icmp ne i32 %11, 0 br i1 %12, label %14, label %13 - ; CHECK: callq l_OUTLINED_FUNCTION_1 + ; CHECK: callq [[OFUNC1]] store i32 1, i32* %2, align 4 store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 @@ -79,13 +79,13 @@ store i32 0, i32* %1, align 4 store i32 0, i32* @x, align 4 - ; CHECK: callq l_OUTLINED_FUNCTION_0 + ; CHECK: callq [[OFUNC2:l_OUTLINED_FUNCTION_[0-9]+]] store i32 1, i32* %2, align 4 store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 store i32 1, i32* @x, align 4 - ; CHECK: callq l_OUTLINED_FUNCTION_0 + ; CHECK: callq [[OFUNC2]] store i32 1, i32* %2, align 4 store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 @@ -95,14 +95,14 @@ attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="true" } -; CHECK-LABEL: l_OUTLINED_FUNCTION_0: +; CHECK-LABEL: l_OUTLINED_FUNCTION_1: ; CHECK: movl $1, -{{[0-9]+}}(%rbp) ; CHECK-NEXT: movl $2, -{{[0-9]+}}(%rbp) ; CHECK-NEXT: movl $3, -{{[0-9]+}}(%rbp) ; CHECK-NEXT: movl $4, -{{[0-9]+}}(%rbp) ; CHECK-NEXT: retq -; CHECK-LABEL: l_OUTLINED_FUNCTION_1: +; CHECK-LABEL: l_OUTLINED_FUNCTION_0: ; CHECK: movl $1, -{{[0-9]+}}(%rbp) ; CHECK-NEXT: movl $2, -{{[0-9]+}}(%rbp) ; CHECK-NEXT: movl $3, -{{[0-9]+}}(%rbp)