Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -197,6 +197,7 @@ FUNCTION_ENTER = 0, FUNCTION_EXIT = 1, TAIL_CALL = 2, + CUSTOM_LOG = 3, }; // The table will contain these structs that point to the sled, the function Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -759,6 +759,12 @@ def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], [IntrReadMem, IntrArgMemOnly]>; +// Xray intrinsics +//===----------------------------------------------------------------------===// +def int_xray_customlog : Intrinsic<[], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +//===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -998,6 +998,16 @@ let hasSideEffects = 1; let isReturn = 1; } +def PATCHABLE_LOG_CALL : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins unknown:$entry); + let AsmString = "# XRay Custom Log."; + let usesCustomInserter = 1; + let isCall = 1; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 1; +} // Generic opcodes used in GlobalISel. include "llvm/Target/GenericOpcodes.td" Index: include/llvm/Target/TargetOpcodes.def =================================================================== --- include/llvm/Target/TargetOpcodes.def +++ include/llvm/Target/TargetOpcodes.def @@ -177,6 +177,10 @@ /// PATCHABLE_RET which specifically only works for return instructions. HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL) +/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be +/// patched to insert instrumentation instructions. +HANDLE_TARGET_OPCODE(PATCHABLE_LOG_CALL) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5729,7 +5729,28 @@ setValue(&I, N); return nullptr; } - + case Intrinsic::xray_customlog: { + SDLoc dl = getCurSDLoc(); + // FIXME -- i've been playing around with adding multiple ops for chain and glue, + // but that might not actually be necessary in this case? clean this up not to use + // a vector once we finalize how this works + SmallVector Ops; + Value *LogEntry = I.getArgOperand(0); + SDValue LogEntryVal = getValue(LogEntry); + SDValue newLogEntryVal; + newLogEntryVal = LogEntryVal; + + auto &DL = DAG.getDataLayout(); + SDVTList NodeTys = DAG.getVTList(EVT(TLI.getPointerTy(DL)), MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(newLogEntryVal); + Ops.push_back(Chain); + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_LOG_CALL, dl, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + // DAG.ReplaceAllUsesWith(newLogEntryVal, patchableNode); + setValue(&I, patchableNode); + return nullptr; + } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; Index: lib/Target/X86/X86AsmPrinter.h =================================================================== --- lib/Target/X86/X86AsmPrinter.h +++ lib/Target/X86/X86AsmPrinter.h @@ -91,6 +91,7 @@ X86MCInstLower &MCIL); void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); + void LowerPATCHABLE_LOG_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); // Helper function that emits the XRay sleds we've collected for a particular // function. Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -19216,6 +19216,16 @@ Reg = RegInfo->getPtrSizedFrameRegister(MF); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } + + /* + case Intrinsic::xray_customlog: { + SDValue EntryOp = Op.getOperand(0); + const Constant *C = getTargetConstantFromNode(EntryOp); + cast(Op.getNode())->getZExtValue(); + MachineFunction &MF = DAG.getMachineFunction(); + return nullptr; + } + */ } } @@ -25614,6 +25624,10 @@ case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); + + case TargetOpcode::PATCHABLE_LOG_CALL: + // Do nothing here, handle in xray instrumentation pass. + return BB; case X86::LCMPXCHG8B: { const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -1021,6 +1021,46 @@ getSubtargetInfo()); } +void X86AsmPrinter::LowerPATCHABLE_LOG_CALL(const MachineInstr &MI, + X86MCInstLower &MCIL) { + // We want to emit the following pattern: + // + // .p2align 1, ... + // .Lxray_sled_N: + // jmp .tmpN # 2 bytes + // # 4 bytes of noops + // push # 5 bytes + // # 5 bytes of noops + // .tmpN + // + // We need the 9 bytes because at runtime, we'd be patching over the full 16 + // bytes with the following pattern: + // + // mov %r10, // 6 bytes + // push // 5 bytes + // call // 5 bytes + // + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->EmitCodeAlignment(2); + OutStreamer->EmitLabel(CurSled); + auto Target = OutContext.createTempSymbol(); + + // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as + // an operand (computed as an offset from the jmp instruction). + // FIXME: Find another less hacky way do force the relative jump. + OutStreamer->EmitBytes("\xeb\x09"); + EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); + // Push the register containing the log entry onto the stack. + MCInst Push; + Push.setOpcode(X86::PUSH64r); + Push.addOperand(MCIL.LowerMachineOperand(&MI, MI.getOperand(0)).getValue()); + OutStreamer->EmitInstruction(Push, getSubtargetInfo()); + + EmitNops(*OutStreamer, 5, Subtarget->is64Bit(), getSubtargetInfo()); + OutStreamer->EmitLabel(Target); + recordSled(CurSled, MI, SledKind::CUSTOM_LOG); +} + void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, X86MCInstLower &MCIL) { // We want to emit the following pattern: @@ -1436,6 +1476,9 @@ case TargetOpcode::PATCHABLE_TAIL_CALL: return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_LOG_CALL: + return LowerPATCHABLE_LOG_CALL(*MI, MCInstLowering); case X86::MORESTACK_RET: EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); Index: test/CodeGen/X86/xray-custom-log.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/xray-custom-log.ll @@ -0,0 +1,30 @@ +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @caller() nounwind noinline uwtable { + %logentryptr = alloca i8 + call void @llvm.xray.custom_log(i8* %logentryptr) + ret i32 0 +} + +declare void @llvm.xray.custom_log(i8*) + +define i32 @caller() nounwind noinline uwtable { +; CHECK: .p2align 1, 0x90 +; CHECK-LABEL: Lxray_sled_0: +; CHECK-NEXT: .ascii "\353\t" +; CHECK-NEXT: nopw 512(%rax,%rax) +; CHECK-LABEL: Ltmp1: +; CHECK: .p2align 1, 0x90 +; CHECK-LABEL: Lxray_sled_3: +; CHECK-NEXT: .ascii "\353\t" +; CHECK-NEXT: nopw 512(%rax,%rax) +; CHECK-LABEL: Ltmp2: + %retval = tail call i32 @callee() +; CHECK: jmp callee # TAILCALL + ret i32 %retval +} +; CHECK: .p2align 4, 0x90 +; CHECK-NEXT: .quad .Lxray_synthetic_1 +; CHECK-LABEL: Lxray_synthetic_1: +; CHECK: .quad .Lxray_sled_2 +; CHECK: .quad .Lxray_sled_3