Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -188,6 +188,34 @@ MCSymbol *getSymbol(const GlobalValue *GV) const; //===------------------------------------------------------------------===// + // XRay instrumentation implementation. + //===------------------------------------------------------------------===// +public: + // This describes the kind of sled we're storing in the XRay table. + enum class SledKind : uint8_t { + FUNCTION_ENTER = 0, + FUNCTION_EXIT = 1, + TAIL_CALL = 2, + }; + + // The table will contain these structs that point to the sled, the function + // containing the sled, and what kind of sled (and whether they should always + // be instrumented). + struct XRayFunctionEntry { + const MCSymbol *Sled; + const MCSymbol *Function; + SledKind Kind; + bool AlwaysInstrument; + const class Function *Fn; + }; + + // All the sleds to be emitted. + std::vector Sleds; + + // Helper function to record a given XRay sled. + void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind); + + //===------------------------------------------------------------------===// // MachineFunctionPass Implementation. //===------------------------------------------------------------------===// Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -956,11 +956,19 @@ def PATCHABLE_RET : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); - let AsmString = "# XRay Function Exit."; + let AsmString = "# XRay Function Patchable RET."; let usesCustomInserter = 1; let hasSideEffects = 1; let isReturn = 1; } +def PATCHABLE_FUNCTION_EXIT : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins); + let AsmString = "# XRay Function Exit."; + let usesCustomInserter = 1; + let hasSideEffects = 0; // FIXME: is this correct? + let isReturn = 0; // Original return instruction will follow +} // Generic opcodes used in GlobalISel. include "llvm/Target/GenericOpcodes.td" Index: include/llvm/Target/TargetOpcodes.def =================================================================== --- include/llvm/Target/TargetOpcodes.def +++ include/llvm/Target/TargetOpcodes.def @@ -153,8 +153,25 @@ /// Wraps a return instruction and its operands to enable adding nop sleds /// either before or after the return. The nop sleds are useful for inserting /// instrumentation instructions at runtime. +/// The patch here replaces the return instruction. HANDLE_TARGET_OPCODE(PATCHABLE_RET) +/// This is a marker instruction which gets translated into a nop sled, useful +/// for inserting instrumentation instructions at runtime. +/// The patch here prepends the return instruction. +/// The same thing as in x86_64 is not possible for ARM because it has multiple +/// return instructions. Furthermore, CPU allows parametrized and even +/// conditional return instructions. In the current ARM implementation we are +/// making use of the fact that currently LLVM doesn't seem to generate +/// conditional return instructions. +/// On ARM, the same instruction can be used for popping multiple registers +/// from the stack and returning (it just pops pc register too), and LLVM +/// generates it sometimes. So we can't insert the sled between this stack +/// adjustment and the return without splitting the original instruction into 2 +/// instructions. So on ARM, rather than jumping into the exit trampoline, we +/// call it, it does the tracing, preserves the stack and returns. +HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2606,3 +2606,13 @@ AsmPrinterHandler::~AsmPrinterHandler() {} void AsmPrinterHandler::markFunctionEnd() {} + +void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, + SledKind Kind) { + auto Fn = MI.getParent()->getParent()->getFunction(); + auto Attr = Fn->getFnAttribute("function-instrument"); + bool AlwaysInstrument = + Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; + Sleds.emplace_back( + XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn }); +} Index: lib/CodeGen/XRayInstrumentation.cpp =================================================================== --- lib/CodeGen/XRayInstrumentation.cpp +++ lib/CodeGen/XRayInstrumentation.cpp @@ -34,7 +34,65 @@ } bool runOnMachineFunction(MachineFunction &MF) override; + +private: + // Replace the original RET instruction with the exit sled code ("patchable ret" + // pseudo-instruction), so that at runtime XRay can replace the sled with a + // code jumping to XRay trampoline, which calls the tracing handler and, in + // the end, issues the RET instruction. + // This is the approach to go on CPUs which have a single RET instruction, + // like x86/x86_64. + void replaceRetWithPatchableRet(MachineFunction &MF, const TargetInstrInfo *TII); + // Prepend the original return instruction with the exit sled code ("patchable + // function exit" pseudo-instruction), preserving the original return instruction + // just after the exit sled code. + // This is the approach to go on CPUs which have multiple options for the return + // instruction, like ARM. For such CPUs we can't just jump into the XRay trampoline + // and issue a single return instruction there. We rather have to call the + // trampoline and return from it to the original return instruction of the + // function being instrumented. + void prependRetWithPatchableExit(MachineFunction &MF, const TargetInstrInfo *TII); }; +} // anonymous namespace + +void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF, const TargetInstrInfo *TII) +{ + // We look for *all* terminators and returns, then replace those with + // PATCHABLE_RET instructions. + SmallVector Terminators; + for (auto &MBB : MF) { + for (auto &T : MBB.terminators()) { + // FIXME: Handle tail calls here too? + if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) { + // Replace return instructions with: + // PATCHABLE_RET , ... + auto MIB = BuildMI(MBB, T, T.getDebugLoc(), + TII->get(TargetOpcode::PATCHABLE_RET)) + .addImm(T.getOpcode()); + for (auto &MO : T.operands()) + MIB.addOperand(MO); + Terminators.push_back(&T); + break; + } + } + } + + for (auto &I : Terminators) + I->eraseFromParent(); +} + +void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF, const TargetInstrInfo *TII) +{ + for (auto &MBB : MF) { + for (auto &T : MBB.terminators()) { + if (T.isReturn()) { + // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT + auto MIB = BuildMI(MBB, T, T.getDebugLoc(), + TII->get(TargetOpcode::PATCHABLE_FUNCTION_EXIT)); + break; //FIXME: is this correct? Can't a MachineBasicBlock have multiple return instructions? + } + } + } } bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { @@ -64,29 +122,15 @@ BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); - // Then we look for *all* terminators and returns, then replace those with - // PATCHABLE_RET instructions. - SmallVector Terminators; - for (auto &MBB : MF) { - for (auto &T : MBB.terminators()) { - // FIXME: Handle tail calls here too? - if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) { - // Replace return instructions with: - // PATCHABLE_RET , ... - auto MIB = BuildMI(MBB, T, T.getDebugLoc(), - TII->get(TargetOpcode::PATCHABLE_RET)) - .addImm(T.getOpcode()); - for (auto &MO : T.operands()) - MIB.addOperand(MO); - Terminators.push_back(&T); - break; - } - } + switch (MF.getTarget().getTargetTriple().getArch()) { + // List here the architectures which don't have a single return instruction + case Triple::ArchType::arm: + prependRetWithPatchableExit(MF, TII); + break; + // Architectures that have a single return instruction (such as RETQ on x86_64) + default: + replaceRetWithPatchableRet(MF, TII); } - - for (auto &I : Terminators) - I->eraseFromParent(); - return true; } Index: lib/Target/ARM/ARMAsmPrinter.h =================================================================== --- lib/Target/ARM/ARMAsmPrinter.h +++ lib/Target/ARM/ARMAsmPrinter.h @@ -94,7 +94,19 @@ // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); + //===------------------------------------------------------------------===// + // XRay implementation + //===------------------------------------------------------------------===// +public: + // XRay-specific lowering for ARM. + void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); + void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); + // Helper function that emits the XRay sleds we've collected for a particular + // function. + void EmitXRayTable(); + private: + void EmitSled(const MachineInstr &MI, SledKind Kind); // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile() void emitAttributes(); Index: lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- lib/Target/ARM/ARMAsmPrinter.cpp +++ lib/Target/ARM/ARMAsmPrinter.cpp @@ -150,6 +150,9 @@ // Emit the rest of the function body. EmitFunctionBody(); + // Emit the XRay table for this function. + EmitXRayTable(); + // If we need V4T thumb mode Register Indirect Jump pads, emit them. // These are created per function, rather than per TU, since it's // relatively easy to exceed the thumb branch range within a TU. @@ -1976,6 +1979,16 @@ .addReg(0)); return; } + case ARM::PATCHABLE_FUNCTION_ENTER: + { + LowerPATCHABLE_FUNCTION_ENTER(*MI); + return; + } + case ARM::PATCHABLE_FUNCTION_EXIT: + { + LowerPATCHABLE_FUNCTION_EXIT(*MI); + return; + } } MCInst TmpInst; Index: lib/Target/ARM/ARMMCInstLower.cpp =================================================================== --- lib/Target/ARM/ARMMCInstLower.cpp +++ lib/Target/ARM/ARMMCInstLower.cpp @@ -21,6 +21,10 @@ #include "llvm/IR/Mangler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCInstBuilder.h" using namespace llvm; @@ -150,3 +154,85 @@ } } } + +// FIXME: Is there a ready way to emit NOPs on ARM? +static void Emit4ByteNops(MCStreamer& OS, int NumInstructions, const MCSubtargetInfo &STI) +{ + STI; // would be useful for OS.EmitInstruction(MCInstBuilder(ARM::NOP), STI); + for (int I = 1; I <= NumInstructions; I++) + { + OS.EmitBytes(StringRef( /*Little-endian!*/ "\x00\xF0\x20\xE3", 4)); + } +} + +void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) +{ + // We want to emit the following pattern: + // + // .Lxray_sled_N: + // ALIGN + // B #20 + // ; 6 NOP instructions (24 bytes) + // .tmpN + // + // We need the 24 bytes (6 instructions) because at runtime, we'd be patching + // over the full 28 bytes (7 instructions) with the following pattern: + // + // PUSH{ r0, lr } + // MOVW r0, # + // MOVT r0, # + // MOVW ip, # + // MOVT ip, # + // BLX ip + // POP{ r0, lr } + // + OutStreamer->EmitCodeAlignment(4); + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->EmitLabel(CurSled); + auto Target = OutContext.createTempSymbol(); + + // Emit "B #20" instruction, which jumps over the next 24 bytes (because + // register pc is 8 bytes ahead of the jump instruction by the moment CPU + // is executing it). + // FIXME: Find another less hacky way do force the relative jump. + OutStreamer->EmitBytes(StringRef( /*Little-endian!*/ "\x05\x00\x00\xEA", 4)); + Emit4ByteNops(*OutStreamer, 6, getSubtargetInfo()); + OutStreamer->EmitLabel(Target); + recordSled(CurSled, MI, Kind); +} + +void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) +{ + EmitSled(MI, SledKind::FUNCTION_ENTER); +} + +void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) +{ + EmitSled(MI, SledKind::FUNCTION_EXIT); +} + +void ARMAsmPrinter::EmitXRayTable() +{ + if (Sleds.empty()) + return; + if (Subtarget->isTargetELF()) { + auto *Section = OutContext.getELFSection( + "xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0, + CurrentFnSym->getName()); + auto PrevSection = OutStreamer->getCurrentSectionOnly(); + OutStreamer->SwitchSection(Section); + for (const auto &Sled : Sleds) { + OutStreamer->EmitSymbolValue(Sled.Sled, 4); + OutStreamer->EmitSymbolValue(CurrentFnSym, 4); + auto Kind = static_cast(Sled.Kind); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast(&Kind), 1)); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1)); + OutStreamer->EmitZeros(6); + } + OutStreamer->SwitchSection(PrevSection); + } + Sleds.clear(); +} Index: lib/Target/X86/X86AsmPrinter.h =================================================================== --- lib/Target/X86/X86AsmPrinter.h +++ lib/Target/X86/X86AsmPrinter.h @@ -71,27 +71,6 @@ StackMapShadowTracker SMShadowTracker; - // This describes the kind of sled we're storing in the XRay table. - enum class SledKind : uint8_t { - FUNCTION_ENTER = 0, - FUNCTION_EXIT = 1, - TAIL_CALL = 2, - }; - - // The table will contain these structs that point to the sled, the function - // containing the sled, and what kind of sled (and whether they should always - // be instrumented). - struct XRayFunctionEntry { - const MCSymbol *Sled; - const MCSymbol *Function; - SledKind Kind; - bool AlwaysInstrument; - const class Function *Fn; - }; - - // All the sleds to be emitted. - std::vector Sleds; - // All instructions emitted by the X86AsmPrinter should use this helper // method. // @@ -117,8 +96,6 @@ // function. void EmitXRayTable(); - // Helper function to record a given XRay sled. - void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind); public: explicit X86AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -1023,16 +1023,6 @@ getSubtargetInfo()); } -void X86AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, - SledKind Kind) { - auto Fn = MI.getParent()->getParent()->getFunction(); - auto Attr = Fn->getFnAttribute("function-instrument"); - bool AlwaysInstrument = - Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; - Sleds.emplace_back( - XRayFunctionEntry{Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn}); -} - void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, X86MCInstLower &MCIL) { // We want to emit the following pattern: Index: test/CodeGen/ARM/xray-attribute-instrumentation.ll =================================================================== --- test/CodeGen/ARM/xray-attribute-instrumentation.ll +++ test/CodeGen/ARM/xray-attribute-instrumentation.ll @@ -0,0 +1,24 @@ +; RUN: llc -filetype=asm -o - -mtriple=arm-unknown-linux-gnu < %s | FileCheck %s + +define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { +; CHECK-LABEL: Lxray_sled_0: +; CHECK-NEXT: .ascii "\005\000\000\352" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-LABEL: Ltmp0: + ret i32 0 +; CHECK-LABEL: Lxray_sled_1: +; CHECK-NEXT: .ascii "\005\000\000\352" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-NEXT: .ascii "\000\360 \343" +; CHECK-LABEL: Ltmp1: +; CHECK-NEXT: mov pc, lr +}