Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -142,6 +142,44 @@ return IsLP64 ? X86::LEA64r : X86::LEA32r; } +static bool isReturnInstruction(unsigned Opc) { + switch (Opc) { + default: return false; + case X86::RET: + case X86::RETL: + case X86::RETQ: + case X86::RETIL: + case X86::RETIQ: + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNmi: + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + case X86::EH_RETURN: + case X86::EH_RETURN64: + return true; + } +} + +static SmallSet getReturnInstructionUses( + MachineBasicBlock::iterator &MBBI, + const X86RegisterInfo *TRI) { + assert(isReturnInstruction(MBBI->getOpcode())); + SmallSet Uses; + for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MBBI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); + } + return Uses; +} + /// findDeadCallerSavedReg - Return a caller-saved register that isn't live /// when it reaches the "return" instruction. We can then pop a stack object /// to this register without worry about clobbering it. @@ -157,38 +195,15 @@ const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF); unsigned Opc = MBBI->getOpcode(); - switch (Opc) { - default: return 0; - case X86::RET: - case X86::RETL: - case X86::RETQ: - case X86::RETIL: - case X86::RETIQ: - case X86::TCRETURNdi: - case X86::TCRETURNri: - case X86::TCRETURNmi: - case X86::TCRETURNdi64: - case X86::TCRETURNri64: - case X86::TCRETURNmi64: - case X86::EH_RETURN: - case X86::EH_RETURN64: { - SmallSet Uses; - for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MBBI->getOperand(i); - if (!MO.isReg() || MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Uses.insert(*AI); - } + if (!isReturnInstruction(Opc)) { + return 0; + } else { + SmallSet Uses = getReturnInstructionUses(MBBI, TRI); for (auto CS : AvailableRegs) if (!Uses.count(CS) && CS != X86::RIP) return CS; } - } return 0; } @@ -405,6 +420,21 @@ Offset -= PI->getOperand(2).getImm(); MBB.erase(PI); if (!doMergeWithPrevious) MBBI = NI; + } else if ((Opc == X86::POP64r || Opc == X86::POP32r) && + PI->getOperand(0).getReg() != X86::RIP && + isReturnInstruction(MBBI->getOpcode())) { + assert(doMergeWithPrevious); + SmallSet Uses = getReturnInstructionUses(MBBI, TRI); + while ((Opc == X86::POP64r || Opc == X86::POP32r) && + PI->getOperand(0).getReg() != X86::RIP && + !Uses.count(PI->getOperand(0).getReg())) { + Offset += (Opc == X86::POP64r ? 8 : 4); + MBB.erase(PI); + if (MBBI == MBB.begin()) + break; + PI = std::prev(MBBI); + Opc = PI->getOpcode(); + } } return Offset; Index: test/CodeGen/X86/hipe-cc.ll =================================================================== --- test/CodeGen/X86/hipe-cc.ll +++ test/CodeGen/X86/hipe-cc.ll @@ -73,5 +73,17 @@ ret void } +define cc 11 { i32, i32, i32 } @tailcaller(i32, i32) nounwind { + ; CHECK: movl $15, %eax + ; CHECK-NEXT: movl $31, %edx + ; CHECK-NEXT: movl $47, %ecx + ; CHECK-NEXT: popl %edi + ; CHECK-NEXT: jmp tailcallee + %3 = tail call cc11 { i32, i32, i32 } @tailcallee(i32 %0, i32 %1, i32 15, + i32 31, i32 47, i32 63) nounwind + ret { i32, i32, i32 } %3 +} + @clos = external constant i32 declare cc 11 void @bar(i32, i32, i32, i32, i32) +declare cc 11 { i32, i32, i32 } @tailcallee(i32, i32, i32, i32, i32, i32) Index: test/CodeGen/X86/hipe-cc64.ll =================================================================== --- test/CodeGen/X86/hipe-cc64.ll +++ test/CodeGen/X86/hipe-cc64.ll @@ -83,5 +83,18 @@ ret void } +define cc 11 { i64, i64, i64 } @tailcaller(i64, i64) #0 { + ; CHECK: movl $15, %esi + ; CHECK-NEXT: movl $31, %edx + ; CHECK-NEXT: movl $47, %ecx + ; CHECK-NEXT: movl $63, %r8d + ; CHECK-NEXT: popq %rax + ; CHECK-NEXT: jmp tailcallee # TAILCALL + %3 = tail call cc11 { i64, i64, i64 } @tailcallee(i64 %0, i64 %1, i64 15, + i64 31, i64 47, i64 63, i64 79) #1 + ret { i64, i64, i64 } %3 +} + @clos = external constant i64 declare cc 11 void @bar(i64, i64, i64, i64, i64, i64) +declare cc 11 { i64, i64, i64 } @tailcallee(i64, i64, i64, i64, i64, i64, i64)