Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -1087,6 +1087,23 @@ /// terminator instruction that has not been predicated. virtual bool isUnpredicatedTerminator(const MachineInstr &MI) const; + /// Returns true if MI is an unconditional tail call. + virtual bool isUnconditionalTailCall(const MachineInstr &MI) const { + return false; + } + + /// Returns true if the tail call can be made conditional on BranchCond. + virtual bool + canMakeTailCallConditional(SmallVectorImpl &Cond, + const MachineInstr &TailCall) const { + return false; + } + + /// Replace the conditional branch in MBB with a conditional tail call. + virtual void replaceBranchWithTailCall(MachineBasicBlock &MBB, + SmallVectorImpl &Cond, + const MachineInstr &TailCall) const {} + /// Convert the instruction into a predicated instruction. /// It returns true if the operation was successful. virtual bool PredicateInstruction(MachineInstr &MI, Index: lib/CodeGen/BranchFolding.cpp =================================================================== --- lib/CodeGen/BranchFolding.cpp +++ lib/CodeGen/BranchFolding.cpp @@ -49,6 +49,7 @@ STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); STATISTIC(NumHoist , "Number of times common instructions are hoisted"); +STATISTIC(NumTailCalls, "Number of tail calls optimized"); static cl::opt FlagEnableTailMerge("enable-tail-merge", cl::init(cl::BOU_UNSET), cl::Hidden); @@ -1448,6 +1449,35 @@ } } + if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1) { + MachineInstr &TailCall = *MBB->getFirstNonDebugInstr(); + if (TII->isUnconditionalTailCall(TailCall)) { + MachineBasicBlock *Pred = *MBB->pred_begin(); + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; + SmallVector PredCond; + bool PredAnalyzable = + !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true); + + if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB) { + // The predecessor has a conditional branch to this block which consists + // of only a tail call. Try to fold the tail call into the conditional + // branch. + if (TII->canMakeTailCallConditional(PredCond, TailCall)) { + TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall); + ++NumTailCalls; + Pred->removeSuccessor(MBB); + MadeChange = true; + return MadeChange; + } + } + // If the predecessor is falling through to this block, we could reverse + // the branch condition and fold the tail call into that. However, after + // that we might have to re-arrange the CFG to fall through to the other + // block and there is a high risk of regressing code size rather than + // improving it. + } + } + // Analyze the branch in the current block. MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr; SmallVector CurCond; Index: lib/Target/X86/X86ExpandPseudo.cpp =================================================================== --- lib/Target/X86/X86ExpandPseudo.cpp +++ lib/Target/X86/X86ExpandPseudo.cpp @@ -77,6 +77,7 @@ default: return false; case X86::TCRETURNdi: + case X86::TCRETURNdicc: case X86::TCRETURNri: case X86::TCRETURNmi: case X86::TCRETURNdi64: @@ -94,9 +95,13 @@ assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); // Incoporate the retaddr area. - Offset = StackAdj-MaxTCDelta; + Offset = StackAdj - MaxTCDelta; assert(Offset >= 0 && "Offset should never be negative"); + if (Opcode == X86::TCRETURNdicc) { + assert(Offset == 0 && "Conditional tail call cannot adjust the stack."); + } + if (Offset) { // Check for possible merge with preceding ADD instruction. Offset += X86FL->mergeSPUpdates(MBB, MBBI, true); @@ -105,19 +110,49 @@ // Jump to label or value in register. bool IsWin64 = STI->isTargetWin64(); - if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) { - unsigned Op = (Opcode == X86::TCRETURNdi) - ? X86::TAILJMPd - : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64); + if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc || + Opcode == X86::TCRETURNdi64) { + unsigned Op; + switch (Opcode) { + case X86::TCRETURNdi: + Op = X86::TAILJMPd; + break; + case X86::TCRETURNdicc: + switch(static_cast(MBBI->getOperand(2).getImm())) { + case X86::COND_E: Op = X86::TAILJMPd_E; break; + case X86::COND_NE: Op = X86::TAILJMPd_NE; break; + case X86::COND_L: Op = X86::TAILJMPd_L; break; + case X86::COND_LE: Op = X86::TAILJMPd_LE; break; + case X86::COND_G: Op = X86::TAILJMPd_G; break; + case X86::COND_GE: Op = X86::TAILJMPd_GE; break; + case X86::COND_B: Op = X86::TAILJMPd_B; break; + case X86::COND_BE: Op = X86::TAILJMPd_BE; break; + case X86::COND_A: Op = X86::TAILJMPd_A; break; + case X86::COND_AE: Op = X86::TAILJMPd_AE; break; + case X86::COND_S: Op = X86::TAILJMPd_S; break; + case X86::COND_NS: Op = X86::TAILJMPd_NS; break; + case X86::COND_P: Op = X86::TAILJMPd_P; break; + case X86::COND_NP: Op = X86::TAILJMPd_NP; break; + case X86::COND_O: Op = X86::TAILJMPd_O; break; + case X86::COND_NO: Op = X86::TAILJMPd_NO; break; + default: + assert(0 && "Unexpected tail call condition code."); + } + break; + default: + Op = IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64; + break; + } MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); - if (JumpTarget.isGlobal()) + if (JumpTarget.isGlobal()) { MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); - else { + } else { assert(JumpTarget.isSymbol()); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } + } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { unsigned Op = (Opcode == X86::TCRETURNmi) ? X86::TAILJMPm Index: lib/Target/X86/X86InstrControl.td =================================================================== --- lib/Target/X86/X86InstrControl.td +++ lib/Target/X86/X86InstrControl.td @@ -245,6 +245,8 @@ let Uses = [ESP] in { def TCRETURNdi : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset), []>; + def TCRETURNdicc : PseudoI<(outs), + (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>; def TCRETURNri : PseudoI<(outs), (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>; let mayLoad = 1 in @@ -257,6 +259,24 @@ (ins i32imm_pcrel:$dst), "jmp\t$dst", [], IIC_JMP_REL>; + + def TAILJMPd_O : Ii32PCRel<0x80, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jo\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_NO : Ii32PCRel<0x81, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jno\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_B : Ii32PCRel<0x82, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jb\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_AE : Ii32PCRel<0x83, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jae\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_E : Ii32PCRel<0x84, RawFrm, (outs), (ins i32imm_pcrel:$dst), "je\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_NE : Ii32PCRel<0x85, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jne\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_BE : Ii32PCRel<0x86, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jbe\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_A : Ii32PCRel<0x87, RawFrm, (outs), (ins i32imm_pcrel:$dst), "ja\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_S : Ii32PCRel<0x88, RawFrm, (outs), (ins i32imm_pcrel:$dst), "js\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_NS : Ii32PCRel<0x89, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jns\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_P : Ii32PCRel<0x8A, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jp\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_NP : Ii32PCRel<0x8B, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jnp\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_L : Ii32PCRel<0x8C, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jl\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_GE : Ii32PCRel<0x8D, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jge\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_LE : Ii32PCRel<0x8E, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jle\t$dst", [], IIC_JMP_REL>; + def TAILJMPd_G : Ii32PCRel<0x8F, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jg\t$dst", [], IIC_JMP_REL>; + def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), "", [], IIC_JMP_REG>; // FIXME: Remove encoding when JIT is dead. let mayLoad = 1 in Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -316,6 +316,13 @@ // Branch analysis. bool isUnpredicatedTerminator(const MachineInstr &MI) const override; + bool isUnconditionalTailCall(const MachineInstr &MI) const override; + bool canMakeTailCallConditional(SmallVectorImpl &Cond, + const MachineInstr &TailCall) const override; + void replaceBranchWithTailCall(MachineBasicBlock &MBB, + SmallVectorImpl &Cond, + const MachineInstr &TailCall) const override; + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -3943,6 +3943,92 @@ return !isPredicated(MI); } +bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNmi: + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + return true; + default: + return false; + } +} + +bool X86InstrInfo::canMakeTailCallConditional( + SmallVectorImpl &BranchCond, + const MachineInstr &TailCall) const { + switch (BranchCond[0].getImm()) { + default: + // Can't make a conditional tail call with this condition. + return false; + case X86::COND_E: + case X86::COND_NE: + case X86::COND_L: + case X86::COND_LE: + case X86::COND_G: + case X86::COND_GE: + case X86::COND_B: + case X86::COND_BE: + case X86::COND_A: + case X86::COND_AE: + case X86::COND_S: + case X86::COND_NS: + case X86::COND_P: + case X86::COND_NP: + case X86::COND_O: + case X86::COND_NO: + break; + } + + if (TailCall.getOpcode() != X86::TCRETURNdi) { + // Only direct calls can be done with a conditional branch. + return false; + } + + const X86MachineFunctionInfo *X86FI = + TailCall.getParent()->getParent()->getInfo(); + if (X86FI->getTCReturnAddrDelta() != 0 || + TailCall.getOperand(1).getImm() != 0) { + // A conditional tail call cannot do any stack adjustment. + return false; + } + + return true; +} + +void X86InstrInfo::replaceBranchWithTailCall( + MachineBasicBlock &MBB, SmallVectorImpl &BranchCond, + const MachineInstr &TailCall) const { + assert(canMakeTailCallConditional(BranchCond, TailCall)); + + MachineBasicBlock::iterator I = MBB.end(); + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + if (!I->isBranch()) + assert(0 && "Can't find the branch to replace!"); + + X86::CondCode CC = getCondFromBranchOpc(I->getOpcode()); + assert(BranchCond.size() == 1); + if (CC != BranchCond[0].getImm()) + continue; + + break; + } + + auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(X86::TCRETURNdicc)); + MIB->addOperand(TailCall.getOperand(0)); // Destination. + MIB.addImm(0); // Stack offset (not used). + MIB->addOperand(BranchCond[0]); // Condition. + MIB->addOperand(TailCall.getOperand(2)); // Regmask. + + I->eraseFromParent(); +} + // Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may // not be a fallthrough MBB now due to layout changes). Return nullptr if the // fallthrough MBB cannot be identified. Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -499,18 +499,30 @@ break; } - // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions. - case X86::TAILJMPr: + // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instructions. + { unsigned Opcode; + case X86::TAILJMPr: Opcode = X86::JMP32r; goto SetTailJmpOpcode; case X86::TAILJMPd: - case X86::TAILJMPd64: { - unsigned Opcode; - switch (OutMI.getOpcode()) { - default: llvm_unreachable("Invalid opcode"); - case X86::TAILJMPr: Opcode = X86::JMP32r; break; - case X86::TAILJMPd: - case X86::TAILJMPd64: Opcode = X86::JMP_1; break; - } - + case X86::TAILJMPd64: Opcode = X86::JMP_1; goto SetTailJmpOpcode; + + case X86::TAILJMPd_O: Opcode = X86::JO_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_NO: Opcode = X86::JNO_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_B: Opcode = X86::JB_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_AE: Opcode = X86::JAE_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_E: Opcode = X86::JE_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_NE: Opcode = X86::JNE_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_BE: Opcode = X86::JBE_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_A: Opcode = X86::JA_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_S: Opcode = X86::JS_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_NS: Opcode = X86::JNS_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_P: Opcode = X86::JP_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_NP: Opcode = X86::JNP_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_L: Opcode = X86::JL_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_GE: Opcode = X86::JGE_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_LE: Opcode = X86::JLE_1; goto SetTailJmpOpcode; + case X86::TAILJMPd_G: Opcode = X86::JG_1; goto SetTailJmpOpcode; + + SetTailJmpOpcode: MCOperand Saved = OutMI.getOperand(0); OutMI = MCInst(); OutMI.setOpcode(Opcode); @@ -1273,6 +1285,22 @@ case X86::TAILJMPr: case X86::TAILJMPm: case X86::TAILJMPd: + case X86::TAILJMPd_O: + case X86::TAILJMPd_NO: + case X86::TAILJMPd_B: + case X86::TAILJMPd_AE: + case X86::TAILJMPd_E: + case X86::TAILJMPd_NE: + case X86::TAILJMPd_BE: + case X86::TAILJMPd_A: + case X86::TAILJMPd_S: + case X86::TAILJMPd_NS: + case X86::TAILJMPd_P: + case X86::TAILJMPd_NP: + case X86::TAILJMPd_L: + case X86::TAILJMPd_GE: + case X86::TAILJMPd_LE: + case X86::TAILJMPd_G: case X86::TAILJMPr64: case X86::TAILJMPm64: case X86::TAILJMPd64: Index: test/CodeGen/X86/atom-pad-short-functions.ll =================================================================== --- test/CodeGen/X86/atom-pad-short-functions.ll +++ test/CodeGen/X86/atom-pad-short-functions.ll @@ -65,11 +65,10 @@ define void @test_call_others(i32 %x) nounwind { ; CHECK: test_call_others -; CHECK: je %tobool = icmp eq i32 %x, 0 br i1 %tobool, label %if.end, label %true.case -; CHECK: jmp external_function +; CHECK: jne external_function true.case: tail call void bitcast (void (...)* @external_function to void ()*)() nounwind br label %if.end Index: test/CodeGen/X86/conditional-tailcall.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/conditional-tailcall.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -march=x86 -show-mc-encoding | FileCheck %s + +declare void @foo() +declare void @bar() +define void @f(i32 %x, i32 %y) { +entry: + %p = icmp eq i32 %x, %y + br i1 %p, label %bb1, label %bb2 +bb1: + tail call void @foo() + ret void +bb2: + tail call void @bar() + ret void +} + +; CHECK-LABEL: f: +; CHECK: cmp +; CHECK: jne bar +; CHECK: encoding: [0x75,A] + +; CHECK: jmp foo Index: test/CodeGen/X86/or-branch.ll =================================================================== --- test/CodeGen/X86/or-branch.ll +++ test/CodeGen/X86/or-branch.ll @@ -10,7 +10,7 @@ ; JUMP1-DAG: sete ; JUMP1-DAG: setl ; JUMP1: orb -; JUMP1: jne +; JUMP1: je entry: %tmp1 = icmp eq i32 %X, 0 %tmp3 = icmp slt i32 %Y, 5 @@ -33,7 +33,7 @@ ; CHECK-DAG: sete ; CHECK-DAG: setl ; CHECK: orb -; CHECK: jne +; CHECK: je entry: %tmp1 = icmp eq i32 %X, 0 %tmp3 = icmp slt i32 %Y, 5 Index: test/CodeGen/X86/sibcall.ll =================================================================== --- test/CodeGen/X86/sibcall.ll +++ test/CodeGen/X86/sibcall.ll @@ -98,8 +98,8 @@ ; 64: jmp {{_?}}bar ; X32ABI-LABEL: t6: +; X32ABI: jg {{_?}}bar ; X32ABI: jmp {{_?}}t6 -; X32ABI: jmp {{_?}}bar %0 = icmp slt i32 %x, 10 br i1 %0, label %bb, label %bb1 @@ -183,11 +183,12 @@ ; eliminated currently. ; 32-LABEL: t11: +; 32: movl +; 32: testl ; 32-NOT: subl ${{[0-9]+}}, %esp -; 32: je ; 32-NOT: movl ; 32-NOT: addl ${{[0-9]+}}, %esp -; 32: jmp {{_?}}foo5 +; 32: jne {{_?}}foo5 ; 64-LABEL: t11: ; 64-NOT: subq ${{[0-9]+}}, %rsp @@ -197,7 +198,7 @@ ; X32ABI-LABEL: t11: ; X32ABI-NOT: subl ${{[0-9]+}}, %esp ; X32ABI-NOT: addl ${{[0-9]+}}, %esp -; X32ABI: jmp {{_?}}foo5 +; X32ABI: jne {{_?}}foo5 entry: %0 = icmp eq i32 %x, 0 br i1 %0, label %bb6, label %bb @@ -218,7 +219,7 @@ ; 32-LABEL: t12: ; 32-NOT: subl ${{[0-9]+}}, %esp ; 32-NOT: addl ${{[0-9]+}}, %esp -; 32: jmp {{_?}}foo6 +; 32: jne {{_?}}foo6 ; 64-LABEL: t12: ; 64-NOT: subq ${{[0-9]+}}, %rsp @@ -228,7 +229,7 @@ ; X32ABI-LABEL: t12: ; X32ABI-NOT: subl ${{[0-9]+}}, %esp ; X32ABI-NOT: addl ${{[0-9]+}}, %esp -; X32ABI: jmp {{_?}}foo6 +; X32ABI: jne {{_?}}foo6 entry: %0 = icmp eq i32 %x, 0 br i1 %0, label %bb2, label %bb Index: test/CodeGen/X86/xor-icmp.ll =================================================================== --- test/CodeGen/X86/xor-icmp.ll +++ test/CodeGen/X86/xor-icmp.ll @@ -9,7 +9,7 @@ ; X32-NOT: andb ; X32-NOT: shrb ; X32: testb $64 -; X32: je +; X32: jne ; X64-LABEL: t: ; X64-NOT: setne @@ -43,7 +43,7 @@ ; X32: cmpl ; X32: sete ; X32-NOT: xor -; X32: je +; X32: jne ; X64-LABEL: t2: ; X64: testl