Index: compiler-rt/lib/xray/xray_trampoline_x86_64.S =================================================================== --- compiler-rt/lib/xray/xray_trampoline_x86_64.S +++ compiler-rt/lib/xray/xray_trampoline_x86_64.S @@ -20,7 +20,7 @@ .macro SAVE_REGISTERS subq $192, %rsp - CFI_DEF_CFA_OFFSET(200) + CFI_ADJUST_CFA_OFFSET(192) // At this point, the stack pointer should be aligned to an 8-byte boundary, // because any call instructions that come after this will add another 8 // bytes and therefore align it to 16-bytes. @@ -60,7 +60,7 @@ movq 8(%rsp), %r8 movq 0(%rsp), %r9 addq $192, %rsp - CFI_DEF_CFA_OFFSET(8) + CFI_ADJUST_CFA_OFFSET(-192) .endm .macro ALIGNED_CALL_RAX @@ -71,10 +71,13 @@ // pointer, we can always look -8 bytes from the current position to get // either of the values we've stashed in the first place. pushq %rsp + CFI_ADJUST_CFA_OFFSET(8) pushq (%rsp) + CFI_ADJUST_CFA_OFFSET(8) andq $-0x10, %rsp callq *%rax movq 8(%rsp), %rsp + CFI_ADJUST_CFA_OFFSET(-16) .endm .text @@ -100,7 +103,7 @@ testq %rax, %rax je .Ltmp0 - // The patched function prolog puts its xray_instr_map index into %r10d. + // The patched function prologue puts its xray_instr_map index into %r10d. movl %r10d, %edi xor %esi,%esi ALIGNED_CALL_RAX @@ -122,7 +125,7 @@ // function is only jumped into, we only preserve the registers for // returning. subq $56, %rsp - CFI_DEF_CFA_OFFSET(64) + CFI_ADJUST_CFA_OFFSET(56) movq %rbp, 48(%rsp) movupd %xmm0, 32(%rsp) movupd %xmm1, 16(%rsp) @@ -144,7 +147,7 @@ movq 8(%rsp), %rax movq 0(%rsp), %rdx addq $56, %rsp - CFI_DEF_CFA_OFFSET(8) + CFI_ADJUST_CFA_OFFSET(-56) retq ASM_SIZE(__xray_FunctionExit) CFI_ENDPROC @@ -219,6 +222,14 @@ CFI_STARTPROC SAVE_REGISTERS + // Because calls to __xray_CustomEvent can occur in the middle of a function + // which may already be using scratch registers, we preserve more information + // in this trampoline than when we are handling entry/exit events. + subq $16, %rsp + CFI_ADJUST_CFA_OFFSET(16) + movq %r10, 8(%rsp) + movq %r11, 0(%rsp) + // We take two arguments to this trampoline, which should be in rdi and rsi // already. We also make sure that we stash %rax because we use that register // to call the logging handler. @@ -229,6 +240,11 @@ ALIGNED_CALL_RAX .LcustomEventCleanup: + + movq 8(%rsp), %r10 + movq 0(%rsp), %r11 + addq $16, %rsp + CFI_ADJUST_CFA_OFFSET(-16) RESTORE_REGISTERS retq ASM_SIZE(__xray_CustomEvent) Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -880,7 +880,7 @@ // Custom event logging for x-ray. // Takes a pointer to a string and the length of the string. def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], - [NoCapture<0>, ReadOnly<0>, IntrWriteMem]>; + [NoCapture<0>, ReadOnly<0>, IntrWriteMem, IntrHasSideEffects]>; //===----------------------------------------------------------------------===// //===------ Memory intrinsics with element-wise atomicity guarantees ------===//