Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -235,13 +235,15 @@ // The table will contain these structs that point to the sled, the function // containing the sled, and what kind of sled (and whether they should always - // be instrumented). + // be instrumented). We also use a version identifier that the runtime can use + // to decide what to do with the sled, depending on the version of the sled. struct XRayFunctionEntry { const MCSymbol *Sled; const MCSymbol *Function; SledKind Kind; bool AlwaysInstrument; const class Function *Fn; + uint8_t Version; void emit(int, MCStreamer *, const MCSymbol *) const; }; @@ -250,7 +252,8 @@ SmallVector Sleds; // Helper function to record a given XRay sled. - void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind); + void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind, + uint8_t Version = 0); /// Emit a table with all XRay instrumentation points. void emitXRayTable(); Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2767,10 +2767,23 @@ Out->EmitSymbolValue(Sled, Bytes); Out->EmitSymbolValue(CurrentFnSym, Bytes); auto Kind8 = static_cast(Kind); - Out->EmitBytes(StringRef(reinterpret_cast(&Kind8), 1)); - Out->EmitBytes( + Out->EmitBinaryData(StringRef(reinterpret_cast(&Kind8), 1)); + Out->EmitBinaryData( StringRef(reinterpret_cast(&AlwaysInstrument), 1)); - Out->EmitZeros(2 * Bytes - 2); // Pad the previous two entries + Out->EmitBinaryData(StringRef(reinterpret_cast(&Version), 1)); + auto Padding = (4 * Bytes) - ((2 * Bytes) + 3); + assert(Padding >= 0 && "Instrumentation map entry > 4 * Word Size"); + Out->EmitZeros(Padding); +} + +void markHiddenAndLocal(MCStreamer &OS, MCSymbol *Sym, + const TargetSubtargetInfo &ST) { + // TODO: Support other platforms where we can do the same for the symbol. + if (ST.getTargetTriple().isOSBinFormatELF()) { + OS.EmitSymbolAttribute(Sym, MCSymbolAttr::MCSA_Hidden); + OS.EmitSymbolAttribute(Sym, MCSymbolAttr::MCSA_Local); + OS.EmitSymbolAttribute(Sym, MCSymbolAttr::MCSA_ELF_TypeNoType); + } } void AsmPrinter::emitXRayTable() { @@ -2782,19 +2795,19 @@ MCSection *InstMap = nullptr; MCSection *FnSledIndex = nullptr; if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { + auto Associated = dyn_cast(PrevSection->getBeginSymbol()); + assert(Associated != nullptr); + auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER; + std::string GroupName; if (Fn->hasComdat()) { - InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Fn->getComdat()->getName()); - FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Fn->getComdat()->getName()); - } else { - InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); - FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); + Flags |= ELF::SHF_GROUP; + GroupName = Fn->getComdat()->getName(); } + + InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + Flags, 0, GroupName, ~0U, Associated); + FnSledIndex = OutContext.getELFSection( + "xray_fn_idx", ELF::SHT_PROGBITS, Flags, 0, GroupName, ~0U, Associated); } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) { InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, SectionKind::getReadOnlyWithRel()); @@ -2810,9 +2823,6 @@ // function is always called just before the function's end, we assume that // this is happening after the last return instruction. auto WordSizeBytes = MAI->getCodePointerSize(); - MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true); - OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false); // Now we switch to the instrumentation map section. Because this is done // per-function, we are able to create an index entry that will represent the @@ -2820,26 +2830,29 @@ MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true); OutStreamer->SwitchSection(InstMap); OutStreamer->EmitLabel(SledsStart); + markHiddenAndLocal(*OutStreamer, SledsStart, MF->getSubtarget()); for (const auto &Sled : Sleds) Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true); OutStreamer->EmitLabel(SledsEnd); + markHiddenAndLocal(*OutStreamer, SledsEnd, MF->getSubtarget()); // We then emit a single entry in the index per function. We use the symbols // that bound the instrumentation map as the range for a specific function. // Each entry here will be 2 * word size aligned, as we're writing down two - // pointers. This should work for both 32-bit and 64-bit platforms. + // pointers. This should work for both 32-bit and 64-bit platforms. The index + // label/symbol we define we mark as "hidden" so they get removed by the + // linker when linking. OutStreamer->SwitchSection(FnSledIndex); OutStreamer->EmitCodeAlignment(2 * WordSizeBytes); - OutStreamer->EmitLabel(IdxRef); - OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes); - OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes); + OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false); + OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes, false); OutStreamer->SwitchSection(PrevSection); Sleds.clear(); } void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, - SledKind Kind) { + SledKind Kind, uint8_t Version) { auto Fn = MI.getParent()->getParent()->getFunction(); auto Attr = Fn->getFnAttribute("function-instrument"); bool LogArgs = Fn->hasFnAttribute("xray-log-args"); @@ -2847,8 +2860,8 @@ Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; if (Kind == SledKind::FUNCTION_ENTER && LogArgs) Kind = SledKind::LOG_ARGS_ENTER; - Sleds.emplace_back( - XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn }); + Sleds.emplace_back(XRayFunctionEntry{Sled, CurrentFnSym, Kind, + AlwaysInstrument, Fn, Version}); } uint16_t AsmPrinter::getDwarfVersion() const { Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -1047,20 +1047,20 @@ // We want to emit the following pattern, which follows the x86 calling // convention to prepare for the trampoline call to be patched in. // - // // .p2align 1, ... // .Lxray_event_sled_N: - // jmp +N // jump across the call instruction - // callq __xray_CustomEvent // force relocation to symbol - // - // - // The relative jump needs to jump forward 24 bytes: - // 10 (args) + 5 (nops) + 9 (cleanup) + // jmp +N // jump across the instrumentation sled + // ... // set up arguments in register + // callq __xray_CustomEvent // force dependency to symbol + // ... + // // // After patching, it would look something like: // // nopw (2-byte nop) + // ... // callq __xrayCustomEvent // already lowered + // ... // // --- // First we emit the label and the jump. @@ -1068,53 +1068,73 @@ OutStreamer->AddComment("# XRay Custom Event Log"); OutStreamer->EmitCodeAlignment(2); OutStreamer->EmitLabel(CurSled); + if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { + OutStreamer->EmitSymbolAttribute(CurSled, MCSymbolAttr::MCSA_Hidden); + OutStreamer->EmitSymbolAttribute(CurSled, MCSymbolAttr::MCSA_Local); + OutStreamer->EmitSymbolAttribute(CurSled, + MCSymbolAttr::MCSA_ELF_TypeNoType); + } // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as // an operand (computed as an offset from the jmp instruction). // FIXME: Find another less hacky way do force the relative jump. - OutStreamer->EmitBytes("\xeb\x14"); + OutStreamer->EmitBytes("\xeb\x0f"); // The default C calling convention will place two arguments into %rcx and // %rdx -- so we only work with those. - unsigned UsedRegs[] = {X86::RDI, X86::RSI, X86::RAX}; - - // Because we will use %rax, we preserve that across the call. - EmitAndCountInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RAX)); - - // Then we put the operands in the %rdi and %rsi registers. + unsigned UsedRegs[] = {X86::RDI, X86::RSI}; + bool UsedMask[] = {false, false}; + + // Then we put the operands in the %rdi and %rsi registers. We spill the + // values in the register before we clobber them, and mark them as used in + // UsedMask. In case the arguments are already in the correct register, we use + // emit nops appropriately sized to keep the sled the same size in every + // situation. for (unsigned I = 0; I < MI.getNumOperands(); ++I) if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { - if (Op->isImm()) + if (Op->isImm()) { + UsedMask[I] = true; + EmitAndCountInstruction( + MCInstBuilder(X86::PUSH64r).addReg(UsedRegs[I])); EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri) .addReg(UsedRegs[I]) .addImm(Op->getImm())); - else if (Op->isReg()) { - if (Op->getReg() != UsedRegs[I]) + } else if (Op->isReg()) { + if (Op->getReg() != UsedRegs[I]) { + UsedMask[I] = true; + EmitAndCountInstruction( + MCInstBuilder(X86::PUSH64r).addReg(UsedRegs[I])); EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) .addReg(UsedRegs[I]) .addReg(Op->getReg())); - else - EmitNops(*OutStreamer, 3, Subtarget->is64Bit(), getSubtargetInfo()); + } else { + EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); + } } } // We emit a hard dependency on the __xray_CustomEvent symbol, which is the - // name of the trampoline to be implemented by the XRay runtime. We put this - // explicitly in the %rax register. + // name of the trampoline to be implemented by the XRay runtime. auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); - EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri) - .addReg(X86::RAX) - .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); // Emit the call instruction. - EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(X86::RAX)); + EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) + .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); // Restore caller-saved and used registers. + for (unsigned I = sizeof UsedMask; I-- > 0;) + if (UsedMask[I]) + EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(UsedRegs[I])); + else + EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); + OutStreamer->AddComment("xray custom event end."); - EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RAX)); - recordSled(CurSled, MI, SledKind::CUSTOM_EVENT); + // Record the sled version. Older versions of this sled were spelled + // differently, so we let the runtime handle the different offsets we're + // using. + recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1); } void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, @@ -1125,7 +1145,6 @@ // .Lxray_sled_N: // jmp .tmpN // # 9 bytes worth of noops - // .tmpN // // We need the 9 bytes because at runtime, we'd be patching over the full 11 // bytes with the following pattern: @@ -1136,14 +1155,18 @@ auto CurSled = OutContext.createTempSymbol("xray_sled_", true); OutStreamer->EmitCodeAlignment(2); OutStreamer->EmitLabel(CurSled); - auto Target = OutContext.createTempSymbol(); + if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { + OutStreamer->EmitSymbolAttribute(CurSled, MCSymbolAttr::MCSA_Hidden); + OutStreamer->EmitSymbolAttribute(CurSled, MCSymbolAttr::MCSA_Local); + OutStreamer->EmitSymbolAttribute(CurSled, + MCSymbolAttr::MCSA_ELF_TypeNoType); + } // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as // an operand (computed as an offset from the jmp instruction). // FIXME: Find another less hacky way do force the relative jump. OutStreamer->EmitBytes("\xeb\x09"); EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); - OutStreamer->EmitLabel(Target); recordSled(CurSled, MI, SledKind::FUNCTION_ENTER); } @@ -1166,6 +1189,12 @@ auto CurSled = OutContext.createTempSymbol("xray_sled_", true); OutStreamer->EmitCodeAlignment(2); OutStreamer->EmitLabel(CurSled); + if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { + OutStreamer->EmitSymbolAttribute(CurSled, MCSymbolAttr::MCSA_Hidden); + OutStreamer->EmitSymbolAttribute(CurSled, MCSymbolAttr::MCSA_Local); + OutStreamer->EmitSymbolAttribute(CurSled, + MCSymbolAttr::MCSA_ELF_TypeNoType); + } unsigned OpCode = MI.getOperand(0).getImm(); MCInst Ret; Ret.setOpcode(OpCode); Index: test/CodeGen/X86/xray-attribute-instrumentation.ll =================================================================== --- test/CodeGen/X86/xray-attribute-instrumentation.ll +++ test/CodeGen/X86/xray-attribute-instrumentation.ll @@ -1,4 +1,6 @@ ; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu \ +; RUN: -relocation-model=pic < %s | FileCheck %s ; RUN: llc -filetype=asm -o - -mtriple=x86_64-darwin-unknown < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {