Index: lib/CodeGen/XRayInstrumentation.cpp =================================================================== --- lib/CodeGen/XRayInstrumentation.cpp +++ lib/CodeGen/XRayInstrumentation.cpp @@ -148,6 +148,7 @@ switch (MF.getTarget().getTargetTriple().getArch()) { case Triple::ArchType::arm: case Triple::ArchType::thumb: + case Triple::ArchType::aarch64: // For the architectures which don't have a single return instruction prependRetWithPatchableExit(MF, TII); break; Index: lib/Target/AArch64/AArch64AsmPrinter.cpp =================================================================== --- lib/Target/AArch64/AArch64AsmPrinter.cpp +++ lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -37,6 +37,8 @@ #include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -68,6 +70,14 @@ const MachineInstr &MI); void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI); + + void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); + void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); + void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); + + void EmitXRayTable(); + void EmitSled(const MachineInstr &MI, SledKind Kind); + /// \brief tblgen'erated driver function for lowering simple MI->MC /// pseudo instructions. bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, @@ -83,7 +93,9 @@ bool runOnMachineFunction(MachineFunction &F) override { AArch64FI = F.getInfo(); STI = static_cast(&F.getSubtarget()); - return AsmPrinter::runOnMachineFunction(F); + bool Result = AsmPrinter::runOnMachineFunction(F); + EmitXRayTable(); + return Result; } private: @@ -122,6 +134,104 @@ //===----------------------------------------------------------------------===// +void AArch64AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) +{ + EmitSled(MI, SledKind::FUNCTION_ENTER); +} + +void AArch64AsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) +{ + EmitSled(MI, SledKind::FUNCTION_EXIT); +} + +void AArch64AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) +{ + EmitSled(MI, SledKind::TAIL_CALL); +} + +void AArch64AsmPrinter::EmitXRayTable() +{ + //TODO: merge the logic for ELF XRay sleds at a higher level, so to avoid + // code duplication as it is now for x86_64, ARM32 and AArch64. + if (Sleds.empty()) + return; + if (STI->isTargetELF()) { + auto PrevSection = OutStreamer->getCurrentSectionOnly(); + auto Fn = MF->getFunction(); + MCSection *Section; + if (Fn->hasComdat()) + Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, + Fn->getComdat()->getName()); + else + Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC); + + // Before we switch over, we force a reference to a label inside the + // xray_instr_map section. Since EmitXRayTable() is always called just + // before the function's end, we assume that this is happening after the + // last return instruction. + // + // We then align the reference to 16 byte boundaries, which we determined + // experimentally to be beneficial to avoid causing decoder stalls. + MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); + OutStreamer->EmitCodeAlignment(16); + OutStreamer->EmitSymbolValue(Tmp, 8, false); + OutStreamer->SwitchSection(Section); + OutStreamer->EmitLabel(Tmp); + for (const auto &Sled : Sleds) { + OutStreamer->EmitSymbolValue(Sled.Sled, 8); + OutStreamer->EmitSymbolValue(CurrentFnSym, 8); + auto Kind = static_cast(Sled.Kind); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast(&Kind), 1)); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1)); + OutStreamer->EmitZeros(14); + } + OutStreamer->SwitchSection(PrevSection); + } + Sleds.clear(); +} + +void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) +{ + static const int8_t NoopsInSledCount = 7; + // We want to emit the following pattern: + // + // .Lxray_sled_N: + // ALIGN + // B #32 + // ; 7 NOP instructions (28 bytes) + // .tmpN + // + // We need the 28 bytes (7 instructions) because at runtime, we'd be patching + // over the full 32 bytes (8 instructions) with the following pattern: + // + // STP X0, X30, [SP, #-16]! ; push X0 and the link register to the stack + // LDR W0, #12 ; W0 := function ID + // LDR X16,#12 ; X16 := addr of __xray_FunctionEntry or __xray_FunctionExit + // BLR X16 ; call the tracing trampoline + // ;DATA: 32 bits of function ID + // ;DATA: lower 32 bits of the address of the trampoline + // ;DATA: higher 32 bits of the address of the trampoline + // LDP X0, X30, [SP], #16 ; pop X0 and the link register from the stack + // + OutStreamer->EmitCodeAlignment(4); + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->EmitLabel(CurSled); + auto Target = OutContext.createTempSymbol(); + + // Emit "B #32" instruction, which jumps over the next 28 bytes. + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::B).addImm(32)); + + for (int8_t I = 0; I < NoopsInSledCount; I++) + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); + + OutStreamer->EmitLabel(Target); + recordSled(CurSled, MI, Kind); +} + void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { const Triple &TT = TM.getTargetTriple(); if (TT.isOSBinFormatMachO()) { @@ -567,6 +677,18 @@ case TargetOpcode::PATCHPOINT: return LowerPATCHPOINT(*OutStreamer, SM, *MI); + + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: + LowerPATCHABLE_FUNCTION_ENTER(*MI); + return; + + case TargetOpcode::PATCHABLE_FUNCTION_EXIT: + LowerPATCHABLE_FUNCTION_EXIT(*MI); + return; + + case TargetOpcode::PATCHABLE_TAIL_CALL: + LowerPATCHABLE_TAIL_CALL(*MI); + return; } // Finally, do the automated lowerings for everything else. Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -173,6 +173,8 @@ bool requiresStrictAlign() const { return StrictAlign; } + bool isXRaySupported() const override { return true; } + bool isX18Reserved() const { return ReserveX18; } bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; }