Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -238,6 +238,7 @@ TAIL_CALL = 2, LOG_ARGS_ENTER = 3, CUSTOM_EVENT = 4, + TYPED_EVENT = 5, }; // The table will contain these structs that point to the sled, the function @@ -644,8 +645,7 @@ GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C); /// Emit GlobalAlias or GlobalIFunc. - void emitGlobalIndirectSymbol(Module &M, - const GlobalIndirectSymbol& GIS); + void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS); void setupCodePaddingContext(const MachineBasicBlock &MBB, MCCodePaddingContext &Context) const; }; Index: include/llvm/CodeGen/FastISel.h =================================================================== --- include/llvm/CodeGen/FastISel.h +++ include/llvm/CodeGen/FastISel.h @@ -535,6 +535,7 @@ bool selectExtractValue(const User *I); bool selectInsertValue(const User *I); bool selectXRayCustomEvent(const CallInst *II); + bool selectXRayTypedEvent(const CallInst *II); private: /// \brief Handle PHI nodes in successor blocks. Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -2547,6 +2547,11 @@ /// details. MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI, MachineBasicBlock *MBB) const; + + /// Replace/modify the XRay typed event operands with target-dependent + /// details. + MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI, + MachineBasicBlock *MBB) const; }; /// This class defines information used to lower LLVM code to legal SelectionDAG Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -894,6 +894,10 @@ // Takes a pointer to a string and the length of the string. def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [NoCapture<0>, ReadOnly<0>, IntrWriteMem]>; +// Typed event logging for x-ray. +// Takes a numeric type tag, a pointer to a string and the length of the string. +def int_xray_typedevent : Intrinsic<[], [llvm_i16_ty, llvm_ptr_ty, llvm_i32_ty], + [NoCapture<1>, ReadOnly<1>, IntrWriteMem]>; //===----------------------------------------------------------------------===// //===------ Memory intrinsics with element-wise atomicity guarantees ------===// Index: include/llvm/Support/TargetOpcodes.def =================================================================== --- include/llvm/Support/TargetOpcodes.def +++ include/llvm/Support/TargetOpcodes.def @@ -183,10 +183,14 @@ /// PATCHABLE_RET which specifically only works for return instructions. HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL) -/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be -/// patched to insert instrumentation instructions. +/// Wraps a logging call and its arguments with nop sleds. At runtime, this can +/// be patched to insert instrumentation instructions. HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL) +/// Wraps a typed logging call and its argument with nop sleds. At runtime, this +/// can be patched to insert instrumentation instructions. +HANDLE_TARGET_OPCODE(PATCHABLE_TYPED_EVENT_CALL) + HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL) /// The following generic opcodes are not supposed to appear after ISel. Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -1133,6 +1133,16 @@ let mayStore = 1; let hasSideEffects = 1; } +def PATCHABLE_TYPED_EVENT_CALL : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins i16imm:$type, ptr_rc:$event, i32imm:$size); + let AsmString = "# XRay Typed Event Log."; + let usesCustomInserter = 1; + let isCall = 1; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 1; +} def FENTRY_CALL : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); Index: lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/FastISel.cpp +++ lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1039,6 +1039,26 @@ return true; } +bool FastISel::selectXRayTypedEvent(const CallInst *I) { + const auto &Triple = TM.getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return true; // don't do anything to this instruction. + SmallVector Ops; + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), + /*IsDef=*/false)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); + for (auto &MO : Ops) + MIB.add(MO); + + // Insert the Patchable Typed Event Call instruction, that gets lowered properly. + return true; +} /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. @@ -1433,6 +1453,8 @@ case Intrinsic::xray_customevent: return selectXRayCustomEvent(II); + case Intrinsic::xray_typedevent: + return selectXRayTypedEvent(II); } return fastLowerIntrinsicCall(II); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6067,6 +6067,41 @@ setValue(&I, patchableNode); return nullptr; } + case Intrinsic::xray_typedevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return nullptr; + + SDLoc DL = getCurSDLoc(); + SmallVector Ops; + + // We want to say that we always want the arguments in registers. + // It's unclear to me how manipulating the selection DAG here forces callers + // to provide arguments in registers instead of on the stack. + SDValue LogTypeId = getValue(I.getArgOperand(0)); + SDValue LogEntryVal = getValue(I.getArgOperand(1)); + SDValue StrSizeVal = getValue(I.getArgOperand(2)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogTypeId); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode( + TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return nullptr; + } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -1001,6 +1001,21 @@ return MBB; } +MachineBasicBlock * +TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI, + MachineBasicBlock *MBB) const { + assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL && + "Called emitXRayTypedEvent on the wrong MI!"); + auto &MF = *MI.getMF(); + auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc()); + for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx) + MIB.add(MI.getOperand(OpIdx)); + + MBB->insert(MachineBasicBlock::iterator(MI), MIB); + MI.eraseFromParent(); + return MBB; +} + /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". // This function is in TargetLowering because it uses RegClassForVT which would Index: lib/Target/X86/X86AsmPrinter.h =================================================================== --- lib/Target/X86/X86AsmPrinter.h +++ lib/Target/X86/X86AsmPrinter.h @@ -95,6 +95,8 @@ void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); + void LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, + X86MCInstLower &MCIL); void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -28219,6 +28219,9 @@ case TargetOpcode::PATCHABLE_EVENT_CALL: return emitXRayCustomEvent(MI, BB); + case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: + return emitXRayTypedEvent(MI, BB); + case X86::LCMPXCHG8B: { const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -1095,6 +1095,9 @@ for (unsigned I = 0; I < MI.getNumOperands(); ++I) if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { assert(Op->isReg() && "Only support arguments in registers"); + // FIXME: Add reordering to the stashing so we never clobber a register + // before we've stashed it. e.g. RDI may be the third argument of the + // caller. if (Op->getReg() != UsedRegs[I]) { UsedMask[I] = true; EmitAndCountInstruction( @@ -1133,6 +1136,103 @@ recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1); } +void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, + X86MCInstLower &MCIL) { + assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64"); + + // We want to emit the following pattern, which follows the x86 calling + // convention to prepare for the trampoline call to be patched in. + // + // .p2align 1, ... + // .Lxray_event_sled_N: + // jmp +N // jump across the instrumentation sled + // ... // set up arguments in register + // callq __xray_TypedEvent@plt // force dependency to symbol + // ... + // + // + // After patching, it would look something like: + // + // nopw (2-byte nop) + // ... + // callq __xrayTypedEvent // already lowered + // ... + // + // --- + // First we emit the label and the jump. + auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true); + OutStreamer->AddComment("# XRay Typed Event Log"); + OutStreamer->EmitCodeAlignment(2); + OutStreamer->EmitLabel(CurSled); + + // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as + // an operand (computed as an offset from the jmp instruction). + // FIXME: Find another less hacky way do force the relative jump. + OutStreamer->EmitBinaryData("\xeb\x14"); + + // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, + // so we'll work with those. Or we may be called via SystemV, in which case + // we don't have to do any translation. + unsigned DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; + bool UsedMask[] = {false, false, false}; + + // Will fill out src regs in the loop. + unsigned SrcRegs[] = {0, 0, 0}; + + // Then we put the operands in the SystemV registers. We spill the values in + // the registers before we clobber them, and mark them as used in UsedMask. + // In case the arguments are already in the correct register, we emit nops + // appropriately sized to keep the sled the same size in every situation. + for (unsigned I = 0; I < MI.getNumOperands(); ++I) + if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { + // TODO: Is register only support adequate? + assert(Op->isReg() && "Only supports arguments in registers"); + SrcRegs[I] = Op->getReg(); + if (SrcRegs[I] != DestRegs[I]) { + UsedMask[I] = true; + EmitAndCountInstruction( + MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); + } else { + EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); + } + } + + // In the above loop we only stash all of the destination registers or emit + // nops if the arguments are already in the right place. Doing the actually + // moving is postponed until after all the registers are stashed so nothing + // is clobbers. We've already added nops to account for the size of mov and + // push if the register is in the right place, so we only have to worry about + // emitting movs. + for (unsigned I = 0; I < MI.getNumOperands(); ++I) + if (UsedMask[I]) + EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) + .addReg(DestRegs[I]) + .addReg(SrcRegs[I])); + + // We emit a hard dependency on the __xray_TypedEvent symbol, which is the + // name of the trampoline to be implemented by the XRay runtime. + auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent"); + MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); + if (isPositionIndependent()) + TOp.setTargetFlags(X86II::MO_PLT); + + // Emit the call instruction. + EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) + .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); + + // Restore caller-saved and used registers. + for (unsigned I = sizeof UsedMask; I-- > 0;) + if (UsedMask[I]) + EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); + else + EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); + + OutStreamer->AddComment("xray typed event end."); + + // Record the sled version. + recordSled(CurSled, MI, SledKind::TYPED_EVENT, 0); +} + void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, X86MCInstLower &MCIL) { // We want to emit the following pattern: @@ -1607,10 +1707,13 @@ case TargetOpcode::PATCHABLE_TAIL_CALL: return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); - + case TargetOpcode::PATCHABLE_EVENT_CALL: return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); + case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: + return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering); + case X86::MORESTACK_RET: EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); return; Index: test/CodeGen/X86/xray-typed-event-log.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/xray-typed-event-log.ll @@ -0,0 +1,45 @@ +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu \ +; RUN: -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC + +define i32 @fn() nounwind noinline uwtable "function-instrument"="xray-always" { + %eventptr = alloca i8 + %eventsize = alloca i32 + %eventtype = alloca i16 + store i16 6, i16* %eventtype + %type = load i16, i16* %eventtype + store i32 3, i32* %eventsize + %val = load i32, i32* %eventsize + call void @llvm.xray.typedevent(i16 %type, i8* %eventptr, i32 %val) + ; CHECK-LABEL: Lxray_typed_event_sled_0: + ; CHECK: .byte 0xeb, 0x14 + ; CHECK-NEXT: pushq %rdi + ; CHECK-NEXT: pushq %rsi + ; CHECK-NEXT: pushq %rdx + ; CHECK-NEXT: movq {{.*}}, %rdi + ; CHECK-NEXT: movq {{.*}}, %rsi + ; CHECK-NEXT: movq {{.*}}, %rdx + ; CHECK-NEXT: callq __xray_TypedEvent + ; CHECK-NEXT: popq %rdx + ; CHECK-NEXT: popq %rsi + ; CHECK-NEXT: popq %rdi + + ; PIC-LABEL: Lxray_typed_event_sled_0: + ; PIC: .byte 0xeb, 0x14 + ; PIC-NEXT: pushq %rdi + ; PIC-NEXT: pushq %rsi + ; PIC-NEXT: pushq %rdx + ; PIC-NEXT: movq {{.*}}, %rdi + ; PIC-NEXT: movq {{.*}}, %rsi + ; PIC-NEXT: movq {{.*}}, %rdx + ; PIC-NEXT: callq __xray_TypedEvent@PLT + ; PIC-NEXT: popq %rdx + ; PIC-NEXT: popq %rsi + ; PIC-NEXT: popq %rdi + ret i32 0 +} +; CHECK-LABEL: xray_instr_map +; CHECK-LABEL: Lxray_sleds_start0: +; CHECK: .quad {{.*}}xray_typed_event_sled_0 + +declare void @llvm.xray.typedevent(i16, i8*, i32)