Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -263,6 +263,10 @@ /// \brief This pass lays out funclets contiguously. extern char &FuncletLayoutID; + /// This pass inserts the XRay instrumentation sleds if they are supported by + /// the target platform. + extern char &XRayInstrumentationID; + /// \brief This pass implements the "patchable-function" attribute. extern char &PatchableFunctionID; Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -334,6 +334,7 @@ void initializeWholeProgramDevirtPass(PassRegistry &); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry &); +void initializeXRayInstrumentationPass(PassRegistry &); } #endif Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -946,6 +946,21 @@ let mayStore = 1; let hasSideEffects = 1; } +def PATCHABLE_FUNCTION_ENTER : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins); + let AsmString = "# XRay Function Enter."; + let usesCustomInserter = 1; + let hasSideEffects = 0; +} +def PATCHABLE_RET : Instruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins variable_ops); + let AsmString = "# XRay Function Exit."; + let usesCustomInserter = 1; + let hasSideEffects = 1; + let isReturn = 1; +} // Generic opcodes used in GlobalISel. include "llvm/Target/GenericOpcodes.td" Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -55,10 +55,11 @@ void operator=(const TargetInstrInfo &) = delete; public: TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u, - unsigned CatchRetOpcode = ~0u) + unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u) : CallFrameSetupOpcode(CFSetupOpcode), CallFrameDestroyOpcode(CFDestroyOpcode), - CatchRetOpcode(CatchRetOpcode) {} + CatchRetOpcode(CatchRetOpcode), + ReturnOpcode(ReturnOpcode) {} virtual ~TargetInstrInfo(); @@ -151,6 +152,7 @@ unsigned getCallFrameDestroyOpcode() const { return CallFrameDestroyOpcode; } unsigned getCatchReturnOpcode() const { return CatchRetOpcode; } + unsigned getReturnOpcode() const { return ReturnOpcode; } /// Returns the actual stack pointer adjustment made by an instruction /// as part of a call sequence. By default, only call frame setup/destroy @@ -1440,6 +1442,7 @@ private: unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; unsigned CatchRetOpcode; + unsigned ReturnOpcode; }; /// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair. Index: include/llvm/Target/TargetOpcodes.def =================================================================== --- include/llvm/Target/TargetOpcodes.def +++ include/llvm/Target/TargetOpcodes.def @@ -142,19 +142,28 @@ /// original instruction. HANDLE_TARGET_OPCODE(PATCHABLE_OP, 23) +/// This is a marker instruction which gets translated into a nop sled, useful +/// for inserting instrumentation instructions at runtime. +HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_ENTER, 24) + +/// Wraps a return instruction and its operands to enable adding nop sleds +/// either before or after the return. The nop sleds are useful for inserting +/// instrumentation instructions at runtime. +HANDLE_TARGET_OPCODE(PATCHABLE_RET, 25) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. /// Generic ADD instruction. This is an integer add. -HANDLE_TARGET_OPCODE(G_ADD, 24) +HANDLE_TARGET_OPCODE(G_ADD, 26) HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_START, G_ADD) /// Generic Bitwise-OR instruction. HANDLE_TARGET_OPCODE(G_OR, 25) /// Generic BRANCH instruction. This is an unconditional branch. -HANDLE_TARGET_OPCODE(G_BR, 26) +HANDLE_TARGET_OPCODE(G_BR, 27) // TODO: Add more generic opcodes as we move along. Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -136,6 +136,7 @@ UnreachableBlockElim.cpp VirtRegMap.cpp WinEHPrepare.cpp + XRayInstrumentation.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -57,6 +57,7 @@ initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); initializeMachineVerifierPassPass(Registry); + initializeXRayInstrumentationPass(Registry); initializePatchableFunctionPass(Registry); initializeOptimizePHIsPass(Registry); initializePEIPass(Registry); Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -660,6 +660,7 @@ addPass(&StackMapLivenessID, false); addPass(&LiveDebugValuesID, false); + addPass(&XRayInstrumentationID, false); addPass(&PatchableFunctionID, false); AddingMachinePasses = false; Index: lib/CodeGen/XRayInstrumentation.cpp =================================================================== --- /dev/null +++ lib/CodeGen/XRayInstrumentation.cpp @@ -0,0 +1,96 @@ +//===-- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a MachineFunctionPass that inserts the appropriate +// XRay instrumentation instructions. We look for XRay-specific attributes +// on the function to determine whether we should insert the replacement +// operations. +// +//===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +namespace { +struct XRayInstrumentation : public MachineFunctionPass { + static char ID; + + XRayInstrumentation() : MachineFunctionPass(ID) { + initializeXRayInstrumentationPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} + +bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { + auto &F = *MF.getFunction(); + auto InstrAttr = F.getFnAttribute("function-instrument"); + bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) && + InstrAttr.isStringAttribute() && + InstrAttr.getValueAsString() == "xray-always"; + Attribute Attr = F.getFnAttribute("xray-instruction-threshold"); + unsigned XRayThreshold = 0; + if (!AlwaysInstrument) { + if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute()) + return false; // XRay threshold attribute not found. + if (Attr.getValueAsString().getAsInteger(10, XRayThreshold)) + return false; // Invalid value for threshold. + if (F.size() < XRayThreshold) + return false; // Function is too small. + } + + // FIXME: Do the loop triviality analysis here or in an earlier pass. + + // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the + // MachineFunction. + auto &FirstMBB = *MF.begin(); + auto &FirstMI = *FirstMBB.begin(); + auto *TII = MF.getSubtarget().getInstrInfo(); + BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), + TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); + + // Then we look for *all* terminators and returns, then replace those with + // PATCHABLE_RET instructions. + SmallVector Terminators; + for (auto &MBB : MF) { + for (auto &T : MBB.terminators()) { + // FIXME: Handle tail calls here too? + if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) { + // Replace return instructions with: + // PATCHABLE_RET , ... + auto MIB = BuildMI(MBB, T, T.getDebugLoc(), + TII->get(TargetOpcode::PATCHABLE_RET)) + .addImm(T.getOpcode()); + for (auto &MO : T.operands()) + MIB.addOperand(MO); + Terminators.push_back(&T); + break; + } + } + } + + for (auto &I : Terminators) + I->eraseFromParent(); + + return true; +} + +char XRayInstrumentation::ID = 0; +char &llvm::XRayInstrumentationID = XRayInstrumentation::ID; +INITIALIZE_PASS(XRayInstrumentation, "xray-instrumentation", "Insert XRay ops", + false, false); Index: lib/Target/X86/X86AsmPrinter.h =================================================================== --- lib/Target/X86/X86AsmPrinter.h +++ lib/Target/X86/X86AsmPrinter.h @@ -71,6 +71,27 @@ StackMapShadowTracker SMShadowTracker; + // This describes the kind of sled we're storing in the XRay table. + enum class SledKind : uint8_t { + FUNCTION_ENTER = 0, + FUNCTION_EXIT = 1, + TAIL_CALL = 2, + }; + + // The table will contain these structs that point to the sled, the function + // containing the sled, and what kind of sled (and whether they should always + // be instrumented). + struct XRayFunctionEntry { + const MCSymbol *Sled; + const MCSymbol *Function; + SledKind Kind; + bool AlwaysInstrument; + const class Function *Fn; + }; + + // All the sleds to be emitted. + std::vector Sleds; + // All instructions emitted by the X86AsmPrinter should use this helper // method. // @@ -86,10 +107,22 @@ void LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI); - public: - explicit X86AsmPrinter(TargetMachine &TM, - std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)), SM(*this), FM(*this) {} + // XRay-specific lowering for X86. + void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, + X86MCInstLower &MCIL); + void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL); + void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); + + // Helper function that emits the XRay sleds we've collected for a particular + // function. + void EmitXRayTable(); + + // Helper function to record a given XRay sled. + void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind); +public: + explicit X86AsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)), SM(*this), FM(*this) {} const char *getPassName() const override { return "X86 Assembly / Object Emitter"; Index: lib/Target/X86/X86AsmPrinter.cpp =================================================================== --- lib/Target/X86/X86AsmPrinter.cpp +++ lib/Target/X86/X86AsmPrinter.cpp @@ -69,6 +69,9 @@ // Emit the rest of the function body. EmitFunctionBody(); + // Emit the XRay table for this function. + EmitXRayTable(); + // We didn't modify anything. return false; } Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -159,6 +159,7 @@ unsigned Opc = MBBI->getOpcode(); switch (Opc) { default: return 0; + case TargetOpcode::PATCHABLE_RET: case X86::RET: case X86::RETL: case X86::RETQ: Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -117,7 +117,8 @@ : X86::ADJCALLSTACKDOWN32), (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32), - X86::CATCHRET), + X86::CATCHRET, + (STI.is64Bit() ? X86::RETQ : X86::RETL)), Subtarget(STI), RI(STI.getTargetTriple()) { static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = { Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -36,9 +36,15 @@ #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ELF.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + using namespace llvm; namespace { @@ -1018,6 +1024,99 @@ getSubtargetInfo()); } +void X86AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, + SledKind Kind) { + auto Fn = MI.getParent()->getParent()->getFunction(); + auto Attr = Fn->getFnAttribute("function-instrument"); + bool AlwaysInstrument = + Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; + Sleds.emplace_back( + XRayFunctionEntry{Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn}); +} + +void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, + X86MCInstLower &MCIL) { + // We want to emit the following pattern: + // + // .Lxray_sled_N: + // .palign 2, ... + // jmp .tmpN + // # 9 bytes worth of noops + // .tmpN + // + // We need the 9 bytes because at runtime, we'd be patching over the full 11 + // bytes with the following pattern: + // + // mov %r10, // 6 bytes + // call // 5 bytes + // + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->EmitLabel(CurSled); + OutStreamer->EmitCodeAlignment(4); + auto Target = OutContext.createTempSymbol(); + + // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as + // an operand (computed as an offset from the jmp instruction). + // FIXME: Find another less hacky way do force the relative jump. + OutStreamer->EmitBytes("\xeb\x09"); + EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); + OutStreamer->EmitLabel(Target); + recordSled(CurSled, MI, SledKind::FUNCTION_ENTER); +} + +void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, + X86MCInstLower &MCIL) { + // Since PATCHABLE_RET takes the opcode of the return statement as an + // argument, we use that to emit the correct form of the RET that we want. + // i.e. when we see this: + // + // PATCHABLE_RET X86::RET ... + // + // We should emit the RET followed by sleds. + // + // .Lxray_sled_N: + // ret # or equivalent instruction + // # 10 bytes worth of noops + // + // This just makes sure that the alignment for the next instruction is 2. + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->EmitLabel(CurSled); + unsigned OpCode = MI.getOperand(0).getImm(); + MCInst Ret; + Ret.setOpcode(OpCode); + for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end())) + if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) + Ret.addOperand(MaybeOperand.getValue()); + OutStreamer->EmitInstruction(Ret, getSubtargetInfo()); + EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo()); + recordSled(CurSled, MI, SledKind::FUNCTION_EXIT); +} + +void X86AsmPrinter::EmitXRayTable() { + if (Sleds.empty()) + return; + if (Subtarget->isTargetELF()) { + auto *Section = OutContext.getELFSection( + "xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0, + CurrentFnSym->getName()); + auto PrevSection = OutStreamer->getCurrentSectionOnly(); + OutStreamer->SwitchSection(Section); + for (const auto &Sled : Sleds) { + OutStreamer->EmitSymbolValue(Sled.Sled, 8); + OutStreamer->EmitSymbolValue(CurrentFnSym, 8); + auto Kind = static_cast(Sled.Kind); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast(&Kind), 1)); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1)); + OutStreamer->EmitZeros(14); + } + OutStreamer->SwitchSection(PrevSection); + } + Sleds.clear(); +} + // Returns instruction preceding MBBI in MachineFunction. // If MBBI is the first instruction of the first basic block, returns null. static MachineBasicBlock::const_iterator @@ -1259,6 +1358,12 @@ case TargetOpcode::PATCHPOINT: return LowerPATCHPOINT(*MI, MCInstLowering); + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: + return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_RET: + return LowerPATCHABLE_RET(*MI, MCInstLowering); + case X86::MORESTACK_RET: EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); return; Index: test/CodeGen/X86/xray-attribute-instrumentation.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/xray-attribute-instrumentation.ll @@ -0,0 +1,13 @@ +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { +; CHECK-LABEL: Lxray_sled_0: +; CHECK-NEXT: .p2align 2, 0x90 +; CHECK-NEXT: .ascii "\353\t" +; CHECK-NEXT: nopw 512(%rax,%rax) +; CHECK-LABEL: Ltmp0: + ret i32 0 +; CHECK-LABEL: Lxray_sled_1: +; CHECK-NEXT: retq +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +} Index: test/CodeGen/X86/xray-selective-instrumentation-miss.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/xray-selective-instrumentation-miss.ll @@ -0,0 +1,9 @@ +; RUN: llc -mcpu=nehalem < %s | not grep xray_sled_ + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-apple-darwin8" + +define i32 @foo() nounwind uwtable "xray-instruction-threshold"="3" { +entry: + ret i32 0 +} Index: test/CodeGen/X86/xray-selective-instrumentation.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/xray-selective-instrumentation.ll @@ -0,0 +1,9 @@ +; RUN: llc -mcpu=nehalem < %s | grep xray_sled_ + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-apple-darwin8" + +define i32 @foo() nounwind uwtable "xray-instruction-threshold"="1" { +entry: + ret i32 0 +} Index: utils/TableGen/InstrInfoEmitter.cpp =================================================================== --- utils/TableGen/InstrInfoEmitter.cpp +++ utils/TableGen/InstrInfoEmitter.cpp @@ -428,7 +428,7 @@ OS << "namespace llvm {\n"; OS << "struct " << ClassName << " : public TargetInstrInfo {\n" << " explicit " << ClassName - << "(int CFSetupOpcode = -1, int CFDestroyOpcode = -1, int CatchRetOpcode = -1);\n" + << "(int CFSetupOpcode = -1, int CFDestroyOpcode = -1, int CatchRetOpcode = -1, int ReturnOpcode = -1);\n" << " ~" << ClassName << "() override {}\n" << "};\n"; OS << "} // end llvm namespace\n"; @@ -443,8 +443,8 @@ OS << "extern const unsigned " << TargetName << "InstrNameIndices[];\n"; OS << "extern const char " << TargetName << "InstrNameData[];\n"; OS << ClassName << "::" << ClassName - << "(int CFSetupOpcode, int CFDestroyOpcode, int CatchRetOpcode)\n" - << " : TargetInstrInfo(CFSetupOpcode, CFDestroyOpcode, CatchRetOpcode) {\n" + << "(int CFSetupOpcode, int CFDestroyOpcode, int CatchRetOpcode, int ReturnOpcode)\n" + << " : TargetInstrInfo(CFSetupOpcode, CFDestroyOpcode, CatchRetOpcode, ReturnOpcode) {\n" << " InitMCInstrInfo(" << TargetName << "Insts, " << TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, " << NumberedInstructions.size() << ");\n}\n";