Index: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/AsmPrinter.h +++ llvm/trunk/include/llvm/CodeGen/AsmPrinter.h @@ -226,6 +226,7 @@ FUNCTION_EXIT = 1, TAIL_CALL = 2, LOG_ARGS_ENTER = 3, + CUSTOM_EVENT = 4, }; // The table will contain these structs that point to the sled, the function Index: llvm/trunk/include/llvm/CodeGen/FastISel.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/FastISel.h +++ llvm/trunk/include/llvm/CodeGen/FastISel.h @@ -506,6 +506,7 @@ bool selectCast(const User *I, unsigned Opcode); bool selectExtractValue(const User *I); bool selectInsertValue(const User *I); + bool selectXRayCustomEvent(const CallInst *II); private: /// \brief Handle PHI nodes in successor blocks. Index: llvm/trunk/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.td +++ llvm/trunk/include/llvm/IR/Intrinsics.td @@ -795,6 +795,14 @@ def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], [IntrReadMem, IntrArgMemOnly]>; +// Xray intrinsics +//===----------------------------------------------------------------------===// +// Custom event logging for x-ray. +// Takes a pointer to a string and the length of the string. +def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], + [NoCapture<0>, ReadOnly<0>, IntrWriteMem]>; +//===----------------------------------------------------------------------===// + //===------ Memory intrinsics with element-wise atomicity guarantees ------===// // Index: llvm/trunk/include/llvm/Target/Target.td =================================================================== --- llvm/trunk/include/llvm/Target/Target.td +++ llvm/trunk/include/llvm/Target/Target.td @@ -1002,6 +1002,16 @@ let hasSideEffects = 1; let isReturn = 1; } +def PATCHABLE_EVENT_CALL : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins ptr_rc:$event, i8imm:$size); + let AsmString = "# XRay Custom Event Log."; + let usesCustomInserter = 1; + let isCall = 1; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 1; +} def FENTRY_CALL : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); Index: llvm/trunk/include/llvm/Target/TargetOpcodes.def =================================================================== --- llvm/trunk/include/llvm/Target/TargetOpcodes.def +++ llvm/trunk/include/llvm/Target/TargetOpcodes.def @@ -182,6 +182,10 @@ /// PATCHABLE_RET which specifically only works for return instructions. HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL) +/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be +/// patched to insert instrumentation instructions. +HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. Index: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -861,6 +861,25 @@ return true; } +bool FastISel::selectXRayCustomEvent(const CallInst *I) { + const auto &Triple = TM.getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return true; // don't do anything to this instruction. + SmallVector Ops; + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), + /*IsDef=*/false)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); + for (auto &MO : Ops) + MIB.add(MO); + // Insert the Patchable Event Call instruction, that gets lowered properly. + return true; +} + + /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) { @@ -1252,6 +1271,9 @@ case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: return selectPatchpoint(II); + + case Intrinsic::xray_customevent: + return selectXRayCustomEvent(II); } return fastLowerIntrinsicCall(II); Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5703,7 +5703,37 @@ setValue(&I, N); return nullptr; } + case Intrinsic::xray_customevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return nullptr; + + SDLoc DL = getCurSDLoc(); + SmallVector Ops; + // We want to say that we always want the arguments in registers. + SDValue LogEntryVal = getValue(I.getArgOperand(0)); + SDValue StrSizeVal = getValue(I.getArgOperand(1)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL, + DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return nullptr; + } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; Index: llvm/trunk/lib/Target/X86/X86AsmPrinter.h =================================================================== --- llvm/trunk/lib/Target/X86/X86AsmPrinter.h +++ llvm/trunk/lib/Target/X86/X86AsmPrinter.h @@ -91,6 +91,7 @@ X86MCInstLower &MCIL); void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); + void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -26517,6 +26517,10 @@ case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); + + case TargetOpcode::PATCHABLE_EVENT_CALL: + // Do nothing here, handle in xray instrumentation pass. + return BB; case X86::LCMPXCHG8B: { const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); Index: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp +++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp @@ -1040,6 +1040,83 @@ getSubtargetInfo()); } +void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, + X86MCInstLower &MCIL) { + assert(Subtarget->is64Bit() && "XRay custom events only suports X86-64"); + + // We want to emit the following pattern, which follows the x86 calling + // convention to prepare for the trampoline call to be patched in. + // + // + // .p2align 1, ... + // .Lxray_event_sled_N: + // jmp +N // jump across the call instruction + // callq __xray_CustomEvent // force relocation to symbol + // + // + // The relative jump needs to jump forward 24 bytes: + // 10 (args) + 5 (nops) + 9 (cleanup) + // + // After patching, it would look something like: + // + // nopw (2-byte nop) + // callq __xrayCustomEvent // already lowered + // + // --- + // First we emit the label and the jump. + auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true); + OutStreamer->AddComment("# XRay Custom Event Log"); + OutStreamer->EmitCodeAlignment(2); + OutStreamer->EmitLabel(CurSled); + + // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as + // an operand (computed as an offset from the jmp instruction). + // FIXME: Find another less hacky way do force the relative jump. + OutStreamer->EmitBytes("\xeb\x14"); + + // The default C calling convention will place two arguments into %rcx and + // %rdx -- so we only work with those. + unsigned UsedRegs[] = {X86::RDI, X86::RSI, X86::RAX}; + + // Because we will use %rax, we preserve that across the call. + EmitAndCountInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RAX)); + + // Then we put the operands in the %rdi and %rsi registers. + for (unsigned I = 0; I < MI.getNumOperands(); ++I) + if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { + if (Op->isImm()) + EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri) + .addReg(UsedRegs[I]) + .addImm(Op->getImm())); + else if (Op->isReg()) { + if (Op->getReg() != UsedRegs[I]) + EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) + .addReg(UsedRegs[I]) + .addReg(Op->getReg())); + else + EmitNops(*OutStreamer, 3, Subtarget->is64Bit(), getSubtargetInfo()); + } + } + + // We emit a hard dependency on the __xray_CustomEvent symbol, which is the + // name of the trampoline to be implemented by the XRay runtime. We put this + // explicitly in the %rax register. + auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); + MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); + EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri) + .addReg(X86::RAX) + .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); + + // Emit the call instruction. + EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(X86::RAX)); + + // Restore caller-saved and used registers. + OutStreamer->AddComment("xray custom event end."); + EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RAX)); + + recordSled(CurSled, MI, SledKind::CUSTOM_EVENT); +} + void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, X86MCInstLower &MCIL) { // We want to emit the following pattern: @@ -1415,6 +1492,9 @@ case TargetOpcode::PATCHABLE_TAIL_CALL: return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_EVENT_CALL: + return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); case X86::MORESTACK_RET: EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); Index: llvm/trunk/test/CodeGen/X86/xray-custom-log.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xray-custom-log.ll +++ llvm/trunk/test/CodeGen/X86/xray-custom-log.ll @@ -0,0 +1,23 @@ +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @fn() nounwind noinline uwtable "function-instrument"="xray-always" { + %eventptr = alloca i8 + %eventsize = alloca i32 + store i32 3, i32* %eventsize + %val = load i32, i32* %eventsize + call void @llvm.xray.customevent(i8* %eventptr, i32 %val) + ; CHECK-LABEL: Lxray_event_sled_0: + ; CHECK-NEXT: .ascii "\353\024 + ; CHECK-NEXT: pushq %rax + ; CHECK-NEXT: movq {{.*}}, %rdi + ; CHECK-NEXT: movq {{.*}}, %rsi + ; CHECK-NEXT: movabsq $__xray_CustomEvent, %rax + ; CHECK-NEXT: callq *%rax + ; CHECK-NEXT: popq %rax + ret i32 0 +} +; CHECK: .section {{.*}}xray_instr_map +; CHECK-LABEL: Lxray_synthetic_0: +; CHECK: .quad {{.*}}xray_event_sled_0 + +declare void @llvm.xray.customevent(i8*, i32)