Index: compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S =================================================================== --- compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S +++ compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S @@ -19,47 +19,56 @@ .macro SAVE_REGISTERS - subq $192, %rsp - CFI_DEF_CFA_OFFSET(200) - // At this point, the stack pointer should be aligned to an 8-byte boundary, - // because any call instructions that come after this will add another 8 - // bytes and therefore align it to 16-bytes. - movq %rbp, 184(%rsp) - movupd %xmm0, 168(%rsp) - movupd %xmm1, 152(%rsp) - movupd %xmm2, 136(%rsp) - movupd %xmm3, 120(%rsp) - movupd %xmm4, 104(%rsp) - movupd %xmm5, 88(%rsp) - movupd %xmm6, 72(%rsp) - movupd %xmm7, 56(%rsp) - movq %rdi, 48(%rsp) - movq %rax, 40(%rsp) - movq %rdx, 32(%rsp) - movq %rsi, 24(%rsp) - movq %rcx, 16(%rsp) - movq %r8, 8(%rsp) - movq %r9, 0(%rsp) + subq $240, %rsp + CFI_DEF_CFA_OFFSET(248) + movq %rbp, 232(%rsp) + movupd %xmm0, 216(%rsp) + movupd %xmm1, 200(%rsp) + movupd %xmm2, 184(%rsp) + movupd %xmm3, 168(%rsp) + movupd %xmm4, 152(%rsp) + movupd %xmm5, 136(%rsp) + movupd %xmm6, 120(%rsp) + movupd %xmm7, 104(%rsp) + movq %rdi, 96(%rsp) + movq %rax, 88(%rsp) + movq %rdx, 80(%rsp) + movq %rsi, 72(%rsp) + movq %rcx, 64(%rsp) + movq %r8, 56(%rsp) + movq %r9, 48(%rsp) + movq %r10, 40(%rsp) + movq %r11, 32(%rsp) + movq %r12, 24(%rsp) + movq %r13, 16(%rsp) + movq %r14, 8(%rsp) + movq %r15, 0(%rsp) .endm .macro RESTORE_REGISTERS - movq 184(%rsp), %rbp - movupd 168(%rsp), %xmm0 - movupd 152(%rsp), %xmm1 - movupd 136(%rsp), %xmm2 - movupd 120(%rsp), %xmm3 - movupd 104(%rsp), %xmm4 - movupd 88(%rsp), %xmm5 - movupd 72(%rsp) , %xmm6 - movupd 56(%rsp) , %xmm7 - movq 48(%rsp), %rdi - movq 40(%rsp), %rax - movq 32(%rsp), %rdx - movq 24(%rsp), %rsi - movq 16(%rsp), %rcx - movq 8(%rsp), %r8 - movq 0(%rsp), %r9 - addq $192, %rsp + movq 232(%rsp), %rbp + movupd 216(%rsp), %xmm0 + movupd 200(%rsp), %xmm1 + movupd 184(%rsp), %xmm2 + movupd 168(%rsp), %xmm3 + movupd 152(%rsp), %xmm4 + movupd 136(%rsp), %xmm5 + movupd 120(%rsp) , %xmm6 + movupd 104(%rsp) , %xmm7 + movq 96(%rsp), %rdi + movq 88(%rsp), %rax + movq 80(%rsp), %rdx + movq 72(%rsp), %rsi + movq 64(%rsp), %rcx + movq 56(%rsp), %r8 + movq 48(%rsp), %r9 + movq 40(%rsp), %r10 + movq 32(%rsp), %r11 + movq 24(%rsp), %r12 + movq 16(%rsp), %r13 + movq 8(%rsp), %r14 + movq 0(%rsp), %r15 + addq $240, %rsp CFI_DEF_CFA_OFFSET(8) .endm @@ -100,7 +109,7 @@ testq %rax, %rax je .Ltmp0 - // The patched function prolog puts its xray_instr_map index into %r10d. + // The patched function prologue puts its xray_instr_map index into %r10d. movl %r10d, %edi xor %esi,%esi ALIGNED_CALL_RAX @@ -220,8 +229,7 @@ SAVE_REGISTERS // We take two arguments to this trampoline, which should be in rdi and rsi - // already. We also make sure that we stash %rax because we use that register - // to call the logging handler. + // already. movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax testq %rax,%rax je .LcustomEventCleanup Index: llvm/trunk/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/TargetLowering.h +++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h @@ -2532,6 +2532,11 @@ /// sequence of memory operands that is recognized by PrologEpilogInserter. MachineBasicBlock *emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const; + + /// Replace/modify the XRay custom event operands with target-dependent + /// details. + MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI, + MachineBasicBlock *MBB) const; }; /// This class defines information used to lower LLVM code to legal SelectionDAG Index: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -874,6 +874,7 @@ TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); for (auto &MO : Ops) MIB.add(MO); + // Insert the Patchable Event Call instruction, that gets lowered properly. return true; } Index: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp @@ -985,6 +985,21 @@ return MBB; } +MachineBasicBlock * +TargetLoweringBase::emitXRayCustomEvent(MachineInstr &MI, + MachineBasicBlock *MBB) const { + assert(MI.getOpcode() == TargetOpcode::PATCHABLE_EVENT_CALL && + "Called emitXRayCustomEvent on the wrong MI!"); + auto &MF = *MI.getMF(); + auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc()); + for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx) + MIB.add(MI.getOperand(OpIdx)); + + MBB->insert(MachineBasicBlock::iterator(MI), MIB); + MI.eraseFromParent(); + return MBB; +} + /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". // This function is in TargetLowering because it uses RegClassForVT which would Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -27878,8 +27878,7 @@ return emitPatchPoint(MI, BB); case TargetOpcode::PATCHABLE_EVENT_CALL: - // Do nothing here, handle in xray instrumentation pass. - return BB; + return emitXRayCustomEvent(MI, BB); case X86::LCMPXCHG8B: { const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();