Index: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/AsmPrinter.h +++ llvm/trunk/include/llvm/CodeGen/AsmPrinter.h @@ -188,6 +188,34 @@ MCSymbol *getSymbol(const GlobalValue *GV) const; //===------------------------------------------------------------------===// + // XRay instrumentation implementation. + //===------------------------------------------------------------------===// +public: + // This describes the kind of sled we're storing in the XRay table. + enum class SledKind : uint8_t { + FUNCTION_ENTER = 0, + FUNCTION_EXIT = 1, + TAIL_CALL = 2, + }; + + // The table will contain these structs that point to the sled, the function + // containing the sled, and what kind of sled (and whether they should always + // be instrumented). + struct XRayFunctionEntry { + const MCSymbol *Sled; + const MCSymbol *Function; + SledKind Kind; + bool AlwaysInstrument; + const class Function *Fn; + }; + + // All the sleds to be emitted. + std::vector Sleds; + + // Helper function to record a given XRay sled. + void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind); + + //===------------------------------------------------------------------===// // MachineFunctionPass Implementation. //===------------------------------------------------------------------===// Index: llvm/trunk/include/llvm/Target/Target.td =================================================================== --- llvm/trunk/include/llvm/Target/Target.td +++ llvm/trunk/include/llvm/Target/Target.td @@ -956,11 +956,19 @@ def PATCHABLE_RET : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); - let AsmString = "# XRay Function Exit."; + let AsmString = "# XRay Function Patchable RET."; let usesCustomInserter = 1; let hasSideEffects = 1; let isReturn = 1; } +def PATCHABLE_FUNCTION_EXIT : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins); + let AsmString = "# XRay Function Exit."; + let usesCustomInserter = 1; + let hasSideEffects = 0; // FIXME: is this correct? + let isReturn = 0; // Original return instruction will follow +} def PATCHABLE_TAIL_CALL : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); Index: llvm/trunk/include/llvm/Target/TargetOpcodes.def =================================================================== --- llvm/trunk/include/llvm/Target/TargetOpcodes.def +++ llvm/trunk/include/llvm/Target/TargetOpcodes.def @@ -153,8 +153,25 @@ /// Wraps a return instruction and its operands to enable adding nop sleds /// either before or after the return. The nop sleds are useful for inserting /// instrumentation instructions at runtime. +/// The patch here replaces the return instruction. HANDLE_TARGET_OPCODE(PATCHABLE_RET) +/// This is a marker instruction which gets translated into a nop sled, useful +/// for inserting instrumentation instructions at runtime. +/// The patch here prepends the return instruction. +/// The same thing as in x86_64 is not possible for ARM because it has multiple +/// return instructions. Furthermore, CPU allows parametrized and even +/// conditional return instructions. In the current ARM implementation we are +/// making use of the fact that currently LLVM doesn't seem to generate +/// conditional return instructions. +/// On ARM, the same instruction can be used for popping multiple registers +/// from the stack and returning (it just pops pc register too), and LLVM +/// generates it sometimes. So we can't insert the sled between this stack +/// adjustment and the return without splitting the original instruction into 2 +/// instructions. So on ARM, rather than jumping into the exit trampoline, we +/// call it, it does the tracing, preserves the stack and returns. +HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT) + /// Wraps a tail call instruction and its operands to enable adding nop sleds /// either before or after the tail exit. We use this as a disambiguation from /// PATCHABLE_RET which specifically only works for return instructions. Index: llvm/trunk/include/llvm/Target/TargetSubtargetInfo.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetSubtargetInfo.h +++ llvm/trunk/include/llvm/Target/TargetSubtargetInfo.h @@ -71,6 +71,8 @@ virtual ~TargetSubtargetInfo(); + virtual bool isXRaySupported() const { return false; } + // Interfaces to the major aspects of target machine information: // // -- Instruction opcode and operand information Index: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2606,3 +2606,13 @@ AsmPrinterHandler::~AsmPrinterHandler() {} void AsmPrinterHandler::markFunctionEnd() {} + +void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, + SledKind Kind) { + auto Fn = MI.getParent()->getParent()->getFunction(); + auto Attr = Fn->getFnAttribute("function-instrument"); + bool AlwaysInstrument = + Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; + Sleds.emplace_back( + XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn }); +} Index: llvm/trunk/lib/CodeGen/XRayInstrumentation.cpp =================================================================== --- llvm/trunk/lib/CodeGen/XRayInstrumentation.cpp +++ llvm/trunk/lib/CodeGen/XRayInstrumentation.cpp @@ -34,7 +34,74 @@ } bool runOnMachineFunction(MachineFunction &MF) override; + +private: + // Replace the original RET instruction with the exit sled code ("patchable + // ret" pseudo-instruction), so that at runtime XRay can replace the sled + // with a code jumping to XRay trampoline, which calls the tracing handler + // and, in the end, issues the RET instruction. + // This is the approach to go on CPUs which have a single RET instruction, + // like x86/x86_64. + void replaceRetWithPatchableRet(MachineFunction &MF, + const TargetInstrInfo *TII); + // Prepend the original return instruction with the exit sled code ("patchable + // function exit" pseudo-instruction), preserving the original return + // instruction just after the exit sled code. + // This is the approach to go on CPUs which have multiple options for the + // return instruction, like ARM. For such CPUs we can't just jump into the + // XRay trampoline and issue a single return instruction there. We rather + // have to call the trampoline and return from it to the original return + // instruction of the function being instrumented. + void prependRetWithPatchableExit(MachineFunction &MF, + const TargetInstrInfo *TII); }; +} // anonymous namespace + +void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF, + const TargetInstrInfo *TII) +{ + // We look for *all* terminators and returns, then replace those with + // PATCHABLE_RET instructions. + SmallVector Terminators; + for (auto &MBB : MF) { + for (auto &T : MBB.terminators()) { + unsigned Opc = 0; + if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) { + // Replace return instructions with: + // PATCHABLE_RET , ... + Opc = TargetOpcode::PATCHABLE_RET; + } + if (TII->isTailCall(T)) { + // Treat the tail call as a return instruction, which has a + // different-looking sled than the normal return case. + Opc = TargetOpcode::PATCHABLE_TAIL_CALL; + } + if (Opc != 0) { + auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc)) + .addImm(T.getOpcode()); + for (auto &MO : T.operands()) + MIB.addOperand(MO); + Terminators.push_back(&T); + } + } + } + + for (auto &I : Terminators) + I->eraseFromParent(); +} + +void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF, + const TargetInstrInfo *TII) +{ + for (auto &MBB : MF) { + for (auto &T : MBB.terminators()) { + if (T.isReturn()) { + // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT + auto MIB = BuildMI(MBB, T, T.getDebugLoc(), + TII->get(TargetOpcode::PATCHABLE_FUNCTION_EXIT)); + } + } + } } bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { @@ -54,6 +121,11 @@ return false; // Function is too small. } + if (!MF.getSubtarget().isXRaySupported()) { + //FIXME: can this be reported somehow? + return false; + } + // FIXME: Do the loop triviality analysis here or in an earlier pass. // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the @@ -64,35 +136,17 @@ BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); - // Then we look for *all* terminators and returns, then replace those with - // PATCHABLE_RET instructions. - SmallVector Terminators; - for (auto &MBB : MF) { - for (auto &T : MBB.terminators()) { - unsigned Opc = 0; - if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) { - // Replace return instructions with: - // PATCHABLE_RET , ... - Opc = TargetOpcode::PATCHABLE_RET; - } - if (TII->isTailCall(T)) { - // Treat the tail call as a return instruction, which has a - // different-looking sled than the normal return case. - Opc = TargetOpcode::PATCHABLE_TAIL_CALL; - } - if (Opc != 0) { - auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc)) - .addImm(T.getOpcode()); - for (auto &MO : T.operands()) - MIB.addOperand(MO); - Terminators.push_back(&T); - } - } + switch (MF.getTarget().getTargetTriple().getArch()) { + case Triple::ArchType::arm: + // For the architectures which don't have a single return instruction + prependRetWithPatchableExit(MF, TII); + break; + default: + // For the architectures that have a single return instruction (such as + // RETQ on x86_64). + replaceRetWithPatchableRet(MF, TII); + break; } - - for (auto &I : Terminators) - I->eraseFromParent(); - return true; } Index: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.h +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.h @@ -94,7 +94,19 @@ // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); + //===------------------------------------------------------------------===// + // XRay implementation + //===------------------------------------------------------------------===// +public: + // XRay-specific lowering for ARM. + void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); + void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); + // Helper function that emits the XRay sleds we've collected for a particular + // function. + void EmitXRayTable(); + private: + void EmitSled(const MachineInstr &MI, SledKind Kind); // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile() void emitAttributes(); Index: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp @@ -150,6 +150,9 @@ // Emit the rest of the function body. EmitFunctionBody(); + // Emit the XRay table for this function. + EmitXRayTable(); + // If we need V4T thumb mode Register Indirect Jump pads, emit them. // These are created per function, rather than per TU, since it's // relatively easy to exceed the thumb branch range within a TU. @@ -2005,6 +2008,12 @@ .addReg(0)); return; } + case ARM::PATCHABLE_FUNCTION_ENTER: + LowerPATCHABLE_FUNCTION_ENTER(*MI); + return; + case ARM::PATCHABLE_FUNCTION_EXIT: + LowerPATCHABLE_FUNCTION_EXIT(*MI); + return; } MCInst TmpInst; Index: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h +++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h @@ -100,6 +100,10 @@ // Return whether the target has an explicit NOP encoding. bool hasNOP() const; + virtual void getNoopForElfTarget(MCInst &NopInst) const { + getNoopForMachoTarget(NopInst); + } + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; Index: llvm/trunk/lib/Target/ARM/ARMMCInstLower.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMMCInstLower.cpp +++ llvm/trunk/lib/Target/ARM/ARMMCInstLower.cpp @@ -21,6 +21,11 @@ #include "llvm/IR/Mangler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCStreamer.h" using namespace llvm; @@ -150,3 +155,85 @@ } } } + +void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) +{ + static const int8_t NoopsInSledCount = 6; + // We want to emit the following pattern: + // + // .Lxray_sled_N: + // ALIGN + // B #20 + // ; 6 NOP instructions (24 bytes) + // .tmpN + // + // We need the 24 bytes (6 instructions) because at runtime, we'd be patching + // over the full 28 bytes (7 instructions) with the following pattern: + // + // PUSH{ r0, lr } + // MOVW r0, # + // MOVT r0, # + // MOVW ip, # + // MOVT ip, # + // BLX ip + // POP{ r0, lr } + // + OutStreamer->EmitCodeAlignment(4); + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->EmitLabel(CurSled); + auto Target = OutContext.createTempSymbol(); + + // Emit "B #20" instruction, which jumps over the next 24 bytes (because + // register pc is 8 bytes ahead of the jump instruction by the moment CPU + // is executing it). + // By analogy to ARMAsmPrinter::emitPseudoExpansionLowering() |case ARM::B|. + // It is not clear why |addReg(0)| is needed (the last operand). + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc).addImm(20) + .addImm(ARMCC::AL).addReg(0)); + + MCInst Noop; + Subtarget->getInstrInfo()->getNoopForElfTarget(Noop); + for (int8_t I = 0; I < NoopsInSledCount; I++) + { + OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); + } + + OutStreamer->EmitLabel(Target); + recordSled(CurSled, MI, Kind); +} + +void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) +{ + EmitSled(MI, SledKind::FUNCTION_ENTER); +} + +void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) +{ + EmitSled(MI, SledKind::FUNCTION_EXIT); +} + +void ARMAsmPrinter::EmitXRayTable() +{ + if (Sleds.empty()) + return; + if (Subtarget->isTargetELF()) { + auto *Section = OutContext.getELFSection( + "xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0, + CurrentFnSym->getName()); + auto PrevSection = OutStreamer->getCurrentSectionOnly(); + OutStreamer->SwitchSection(Section); + for (const auto &Sled : Sleds) { + OutStreamer->EmitSymbolValue(Sled.Sled, 4); + OutStreamer->EmitSymbolValue(CurrentFnSym, 4); + auto Kind = static_cast(Sled.Kind); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast(&Kind), 1)); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1)); + OutStreamer->EmitZeros(6); + } + OutStreamer->SwitchSection(PrevSection); + } + Sleds.clear(); +} Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h @@ -540,6 +540,8 @@ } bool isTargetAndroid() const { return TargetTriple.isAndroid(); } + virtual bool isXRaySupported() const override; + bool isAPCS_ABI() const; bool isAAPCS_ABI() const; bool isAAPCS16_ABI() const; Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp @@ -101,6 +101,11 @@ : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), TLInfo(TM, *this) {} +bool ARMSubtarget::isXRaySupported() const { + // We don't currently suppport Thumb, but Windows requires Thumb. + return hasV6Ops() && !isTargetWindows(); +} + void ARMSubtarget::initializeEnvironment() { // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this // directly from it, but we can try to make sure they're consistent when both Index: llvm/trunk/lib/Target/X86/X86AsmPrinter.h =================================================================== --- llvm/trunk/lib/Target/X86/X86AsmPrinter.h +++ llvm/trunk/lib/Target/X86/X86AsmPrinter.h @@ -71,27 +71,6 @@ StackMapShadowTracker SMShadowTracker; - // This describes the kind of sled we're storing in the XRay table. - enum class SledKind : uint8_t { - FUNCTION_ENTER = 0, - FUNCTION_EXIT = 1, - TAIL_CALL = 2, - }; - - // The table will contain these structs that point to the sled, the function - // containing the sled, and what kind of sled (and whether they should always - // be instrumented). - struct XRayFunctionEntry { - const MCSymbol *Sled; - const MCSymbol *Function; - SledKind Kind; - bool AlwaysInstrument; - const class Function *Fn; - }; - - // All the sleds to be emitted. - std::vector Sleds; - // All instructions emitted by the X86AsmPrinter should use this helper // method. // @@ -117,8 +96,6 @@ // function. void EmitXRayTable(); - // Helper function to record a given XRay sled. - void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind); public: explicit X86AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) Index: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp +++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp @@ -1022,16 +1022,6 @@ getSubtargetInfo()); } -void X86AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, - SledKind Kind) { - auto Fn = MI.getParent()->getParent()->getFunction(); - auto Attr = Fn->getFnAttribute("function-instrument"); - bool AlwaysInstrument = - Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; - Sleds.emplace_back( - XRayFunctionEntry{Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn}); -} - void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, X86MCInstLower &MCIL) { // We want to emit the following pattern: Index: llvm/trunk/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h +++ llvm/trunk/lib/Target/X86/X86Subtarget.h @@ -460,6 +460,8 @@ bool hasPKU() const { return HasPKU; } bool hasMPX() const { return HasMPX; } + virtual bool isXRaySupported() const override { return is64Bit(); } + bool isAtom() const { return X86ProcFamily == IntelAtom; } bool isSLM() const { return X86ProcFamily == IntelSLM; } bool useSoftFloat() const { return UseSoftFloat; } Index: llvm/trunk/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll +++ llvm/trunk/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll @@ -0,0 +1,24 @@ +; RUN: llc -filetype=asm -o - -mtriple=armv6-unknown-linux-gnu < %s | FileCheck %s + +define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { +; CHECK-LABEL: Lxray_sled_0: +; CHECK-NEXT: b #20 +; CHECK-NEXT: mov r0, r0 +; CHECK-NEXT: mov r0, r0 +; CHECK-NEXT: mov r0, r0 +; CHECK-NEXT: mov r0, r0 +; CHECK-NEXT: mov r0, r0 +; CHECK-NEXT: mov r0, r0 +; CHECK-LABEL: Ltmp0: + ret i32 0 +; CHECK-LABEL: Lxray_sled_1: +; CHECK-NEXT: b #20 +; CHECK-NEXT: mov r0, r0 +; CHECK-NEXT: mov r0, r0 +; CHECK-NEXT: mov r0, r0 +; CHECK-NEXT: mov r0, r0 +; CHECK-NEXT: mov r0, r0 +; CHECK-NEXT: mov r0, r0 +; CHECK-LABEL: Ltmp1: +; CHECK-NEXT: bx lr +} Index: llvm/trunk/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll +++ llvm/trunk/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll @@ -0,0 +1,24 @@ +; RUN: llc -filetype=asm -o - -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s + +define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { +; CHECK-LABEL: Lxray_sled_0: +; CHECK-NEXT: b #20 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-LABEL: Ltmp0: + ret i32 0 +; CHECK-LABEL: Lxray_sled_1: +; CHECK-NEXT: b #20 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-LABEL: Ltmp1: +; CHECK-NEXT: bx lr +}