Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -961,6 +961,14 @@ let hasSideEffects = 1; let isReturn = 1; } +def PATCHABLE_TAIL_CALL : Instruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins variable_ops); + let AsmString = "# XRay Tail Call Exit."; + let usesCustomInserter = 1; + let hasSideEffects = 1; + let isReturn = 1; +} // Generic opcodes used in GlobalISel. include "llvm/Target/GenericOpcodes.td" Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -1481,6 +1481,11 @@ return None; } + /// Determines whether |Inst| is a tail call instruction. + virtual bool isTailCall(const MachineInstr &Inst) const { + return false; + } + private: unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; unsigned CatchRetOpcode; Index: include/llvm/Target/TargetOpcodes.def =================================================================== --- include/llvm/Target/TargetOpcodes.def +++ include/llvm/Target/TargetOpcodes.def @@ -155,6 +155,11 @@ /// instrumentation instructions at runtime. HANDLE_TARGET_OPCODE(PATCHABLE_RET) +/// Wraps a tail call instruction and its operands to enable adding nop sleds +/// either before or after the tail exit. We use this as a disambiguation from +/// PATCHABLE_RET which specifically only works for return instructions. +HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. Index: lib/CodeGen/XRayInstrumentation.cpp =================================================================== --- lib/CodeGen/XRayInstrumentation.cpp +++ lib/CodeGen/XRayInstrumentation.cpp @@ -69,12 +69,19 @@ SmallVector Terminators; for (auto &MBB : MF) { for (auto &T : MBB.terminators()) { - // FIXME: Handle tail calls here too? + unsigned Opc = 0; if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) { // Replace return instructions with: // PATCHABLE_RET , ... - auto MIB = BuildMI(MBB, T, T.getDebugLoc(), - TII->get(TargetOpcode::PATCHABLE_RET)) + Opc = TargetOpcode::PATCHABLE_RET; + } + if (TII->isTailCall(T)) { + // Treat the tail call as a return instruction, which has a + // different-looking sled than the normal return case. + Opc = TargetOpcode::PATCHABLE_TAIL_CALL; + } + if (Opc != 0) { + auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc)) .addImm(T.getOpcode()); for (auto &MO : T.operands()) MIB.addOperand(MO); Index: lib/Target/Hexagon/HexagonInstrInfo.h =================================================================== --- lib/Target/Hexagon/HexagonInstrInfo.h +++ lib/Target/Hexagon/HexagonInstrInfo.h @@ -340,7 +340,10 @@ bool isSignExtendingLoad(const MachineInstr &MI) const; bool isSolo(const MachineInstr &MI) const; bool isSpillPredRegOp(const MachineInstr &MI) const; - bool isTailCall(const MachineInstr &MI) const; + + // Defined in Target.h. + bool isTailCall(const MachineInstr &MI) const override; + bool isTC1(const MachineInstr &MI) const; bool isTC2(const MachineInstr &MI) const; bool isTC2Early(const MachineInstr &MI) const; Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -541,6 +541,8 @@ ArrayRef> getSerializableDirectMachineOperandTargetFlags() const override; + bool isTailCall(const MachineInstr &Inst) const override; + protected: /// Commutes the operands in the given instruction by changing the operands /// order and/or changing the instruction's opcode and/or the immediate value Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -8061,6 +8061,29 @@ return makeArrayRef(TargetFlags); } +bool X86InstrInfo::isTailCall(const MachineInstr &Inst) const { + switch (Inst.getOpcode()) { + case X86::TCRETURNdi: + case X86::TCRETURNmi: + case X86::TCRETURNri: + case X86::TCRETURNdi64: + case X86::TCRETURNmi64: + case X86::TCRETURNri64: + case X86::TAILJMPd: + case X86::TAILJMPm: + case X86::TAILJMPr: + case X86::TAILJMPd64: + case X86::TAILJMPm64: + case X86::TAILJMPr64: + case X86::TAILJMPd64_REX: + case X86::TAILJMPm64_REX: + case X86::TAILJMPr64_REX: + return true; + default: + return false; + } +} + namespace { /// Create Global Base Reg pass. This initializes the PIC /// global base register for x86-32. Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -1093,6 +1093,39 @@ recordSled(CurSled, MI, SledKind::FUNCTION_EXIT); } +void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) { + // Like PATCHABLE_RET, we have the actual instruction in the operands to this + // instruction so we lower that particular instruction and its operands. + // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how + // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to + // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual + // tail call much like how we have it in PATCHABLE_RET. + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->EmitCodeAlignment(2); + OutStreamer->EmitLabel(CurSled); + auto Target = OutContext.createTempSymbol(); + + // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as + // an operand (computed as an offset from the jmp instruction). + // FIXME: Find another less hacky way do force the relative jump. + OutStreamer->EmitBytes("\xeb\x09"); + EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); + OutStreamer->EmitLabel(Target); + recordSled(CurSled, MI, SledKind::TAIL_CALL); + + unsigned OpCode = MI.getOperand(0).getImm(); + MCInst TC; + TC.setOpcode(OpCode); + + // Before emitting the instruction, add a comment to indicate that this is + // indeed a tail call. + OutStreamer->AddComment("TAILCALL"); + for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end())) + if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) + TC.addOperand(MaybeOperand.getValue()); + OutStreamer->EmitInstruction(TC, getSubtargetInfo()); +} + void X86AsmPrinter::EmitXRayTable() { if (Sleds.empty()) return; @@ -1383,6 +1416,9 @@ case TargetOpcode::PATCHABLE_RET: return LowerPATCHABLE_RET(*MI, MCInstLowering); + case TargetOpcode::PATCHABLE_TAIL_CALL: + return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); + case X86::MORESTACK_RET: EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); return; Index: test/CodeGen/X86/xray-tail-call-sled.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/xray-tail-call-sled.ll @@ -0,0 +1,41 @@ +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @callee() nounwind noinline uwtable "function-instrument"="xray-always" { +; CHECK: .p2align 1, 0x90 +; CHECK-LABEL: Lxray_sled_0: +; CHECK-NEXT: .ascii "\353\t" +; CHECK-NEXT: nopw 512(%rax,%rax) +; CHECK-LABEL: Ltmp0: + ret i32 0 +; CHECK: .p2align 1, 0x90 +; CHECK-LABEL: Lxray_sled_1: +; CHECK-NEXT: retq +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +} +; CHECK: .p2align 4, 0x90 +; CHECK-NEXT: .quad .Lxray_synthetic_0 +; CHECK-NEXT: .section xray_instr_map,{{.*}} +; CHECK-LABEL: Lxray_synthetic_0: +; CHECK: .quad .Lxray_sled_0 +; CHECK: .quad .Lxray_sled_1 + +define i32 @caller() nounwind noinline uwtable "function-instrument"="xray-always" { +; CHECK: .p2align 1, 0x90 +; CHECK-LABEL: Lxray_sled_2: +; CHECK-NEXT: .ascii "\353\t" +; CHECK-NEXT: nopw 512(%rax,%rax) +; CHECK-LABEL: Ltmp1: +; CHECK: .p2align 1, 0x90 +; CHECK-LABEL: Lxray_sled_3: +; CHECK-NEXT: .ascii "\353\t" +; CHECK-NEXT: nopw 512(%rax,%rax) +; CHECK-LABEL: Ltmp2: + %retval = tail call i32 @callee() +; CHECK: jmp callee # TAILCALL + ret i32 %retval +} +; CHECK: .p2align 4, 0x90 +; CHECK-NEXT: .quad .Lxray_synthetic_1 +; CHECK-LABEL: Lxray_synthetic_1: +; CHECK: .quad .Lxray_sled_2 +; CHECK: .quad .Lxray_sled_3