diff --git a/llvm/include/llvm/ExecutionEngine/Orc/PerfSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/PerfSupportPlugin.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/PerfSupportPlugin.h @@ -0,0 +1,65 @@ +//===----- PerfSupportPlugin.h ----- Utils for perf support -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Handles support for registering code with perf +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_PERFSUPPORTPLUGIN_H +#define LLVM_EXECUTIONENGINE_ORC_PERFSUPPORTPLUGIN_H + +#include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h" + +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" + +namespace llvm { +namespace orc { + +/// Log perf jitdump events for each object (see +/// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt). +/// Currently has support for dumping code load records and unwind info records. +class PerfSupportPlugin : public ObjectLinkingLayer::Plugin { +public: + PerfSupportPlugin(ExecutorProcessControl &EPC, + ExecutorAddr RegisterPerfStartAddr, + ExecutorAddr RegisterPerfEndAddr, + ExecutorAddr RegisterPerfImplAddr, bool EmitUnwindInfo); + ~PerfSupportPlugin(); + + void modifyPassConfig(MaterializationResponsibility &MR, + jitlink::LinkGraph &G, + jitlink::PassConfiguration &Config) override; + + Error notifyFailed(MaterializationResponsibility &MR) override { + return Error::success(); + } + + Error notifyRemovingResources(JITDylib &JD, ResourceKey K) override { + return Error::success(); + } + + void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey, + ResourceKey SrcKey) override {} + + static Expected> + Create(ExecutorProcessControl &EPC, JITDylib &JD, bool EmitUnwindInfo); + +private: + ExecutorProcessControl &EPC; + ExecutorAddr RegisterPerfStartAddr; + ExecutorAddr RegisterPerfEndAddr; + ExecutorAddr RegisterPerfImplAddr; + std::atomic CodeIndex; + bool EmitUnwindInfo; +}; + +} // namespace orc +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_PERFSUPPORTPLUGIN_H \ No newline at end of file diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h @@ -0,0 +1,233 @@ +//===--- PerfSharedStructs.h --- RPC Structs for perf support ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Structs and serialization to share perf-related information +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_PERFSHAREDSTRUCTS_H +#define LLVM_EXECUTIONENGINE_ORC_SHARED_PERFSHAREDSTRUCTS_H + +#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" + +namespace llvm { + +namespace orc { + +// The following are POD struct definitions from the perf jit specification + +enum class PerfJITRecordType { + JIT_CODE_LOAD = 0, + JIT_CODE_MOVE = 1, // not emitted, code isn't moved + JIT_CODE_DEBUG_INFO = 2, + JIT_CODE_CLOSE = 3, // not emitted, unnecessary + JIT_CODE_UNWINDING_INFO = 4, // not emitted + + JIT_CODE_MAX +}; + +struct PerfJITRecordPrefix { + PerfJITRecordType Id; // record type identifier, uint32_t + uint32_t TotalSize; +}; +struct PerfJITCodeLoadRecord { + PerfJITRecordPrefix Prefix; + + uint32_t Pid; + uint32_t Tid; + uint64_t Vma; + uint64_t CodeAddr; + uint64_t CodeSize; + uint64_t CodeIndex; + std::string Name; +}; + +struct PerfJITDebugEntry { + uint64_t Addr; + uint32_t Lineno; // source line number starting at 1 + uint32_t Discrim; // column discriminator, 0 is default + std::string Name; +}; + +struct PerfJITDebugInfoRecord { + PerfJITRecordPrefix Prefix; + + uint64_t CodeAddr; + std::vector Entries; +}; + +struct PerfJITCodeUnwindingInfoRecord { + PerfJITRecordPrefix Prefix; + + uint64_t UnwindDataSize; + uint64_t EHFrameHdrSize; + uint64_t MappedSize; + // Union, one will always be 0/"", the other has data + uint64_t EHFrameHdrAddr; + std::string EHFrameHdr; + + uint64_t EHFrameAddr; + // size is UnwindDataSize - EHFrameHdrSize +}; + +// Batch vehicle for minimizing RPC calls for perf jit records +struct PerfJITRecordBatch { + std::vector DebugInfoRecords; + std::vector CodeLoadRecords; + // only valid if record size > 0 + PerfJITCodeUnwindingInfoRecord UnwindingRecord; +}; + +// SPS traits for Records + +namespace shared { + +using SPSPerfJITRecordPrefix = SPSTuple; + +template <> +class SPSSerializationTraits { +public: + static size_t size(const PerfJITRecordPrefix &Val) { + return SPSPerfJITRecordPrefix::AsArgList::size( + static_cast(Val.Id), Val.TotalSize); + } + static bool deserialize(SPSInputBuffer &IB, PerfJITRecordPrefix &Val) { + uint32_t Id; + if (!SPSPerfJITRecordPrefix::AsArgList::deserialize(IB, Id, Val.TotalSize)) + return false; + Val.Id = static_cast(Id); + return true; + } + static bool serialize(SPSOutputBuffer &OB, const PerfJITRecordPrefix &Val) { + return SPSPerfJITRecordPrefix::AsArgList::serialize( + OB, static_cast(Val.Id), Val.TotalSize); + } +}; + +using SPSPerfJITCodeLoadRecord = + SPSTuple; + +template <> +class SPSSerializationTraits { +public: + static size_t size(const PerfJITCodeLoadRecord &Val) { + return SPSPerfJITCodeLoadRecord::AsArgList::size( + Val.Prefix, Val.Pid, Val.Tid, Val.Vma, Val.CodeAddr, Val.CodeSize, + Val.CodeIndex, Val.Name); + } + + static bool deserialize(SPSInputBuffer &IB, PerfJITCodeLoadRecord &Val) { + return SPSPerfJITCodeLoadRecord::AsArgList::deserialize( + IB, Val.Prefix, Val.Pid, Val.Tid, Val.Vma, Val.CodeAddr, Val.CodeSize, + Val.CodeIndex, Val.Name); + } + + static bool serialize(SPSOutputBuffer &OB, const PerfJITCodeLoadRecord &Val) { + return SPSPerfJITCodeLoadRecord::AsArgList::serialize( + OB, Val.Prefix, Val.Pid, Val.Tid, Val.Vma, Val.CodeAddr, Val.CodeSize, + Val.CodeIndex, Val.Name); + } +}; + +using SPSPerfJITDebugEntry = SPSTuple; + +template <> +class SPSSerializationTraits { +public: + static size_t size(const PerfJITDebugEntry &Val) { + return SPSPerfJITDebugEntry::AsArgList::size(Val.Addr, Val.Lineno, + Val.Discrim, Val.Name); + } + + static bool deserialize(SPSInputBuffer &IB, PerfJITDebugEntry &Val) { + return SPSPerfJITDebugEntry::AsArgList::deserialize( + IB, Val.Addr, Val.Lineno, Val.Discrim, Val.Name); + } + + static bool serialize(SPSOutputBuffer &OB, const PerfJITDebugEntry &Val) { + return SPSPerfJITDebugEntry::AsArgList::serialize(OB, Val.Addr, Val.Lineno, + Val.Discrim, Val.Name); + } +}; + +using SPSPerfJITDebugInfoRecord = SPSTuple>; + +template <> +class SPSSerializationTraits { +public: + static size_t size(const PerfJITDebugInfoRecord &Val) { + return SPSPerfJITDebugInfoRecord::AsArgList::size(Val.Prefix, Val.CodeAddr, + Val.Entries); + } + static bool deserialize(SPSInputBuffer &IB, PerfJITDebugInfoRecord &Val) { + return SPSPerfJITDebugInfoRecord::AsArgList::deserialize( + IB, Val.Prefix, Val.CodeAddr, Val.Entries); + } + static bool serialize(SPSOutputBuffer &OB, + const PerfJITDebugInfoRecord &Val) { + return SPSPerfJITDebugInfoRecord::AsArgList::serialize( + OB, Val.Prefix, Val.CodeAddr, Val.Entries); + } +}; + +using SPSPerfJITCodeUnwindingInfoRecord = + SPSTuple; +template <> +class SPSSerializationTraits { +public: + static size_t size(const PerfJITCodeUnwindingInfoRecord &Val) { + return SPSPerfJITCodeUnwindingInfoRecord::AsArgList::size( + Val.Prefix, Val.UnwindDataSize, Val.EHFrameHdrSize, Val.MappedSize, + Val.EHFrameHdrAddr, Val.EHFrameHdr, Val.EHFrameAddr); + } + static bool deserialize(SPSInputBuffer &IB, + PerfJITCodeUnwindingInfoRecord &Val) { + return SPSPerfJITCodeUnwindingInfoRecord::AsArgList::deserialize( + IB, Val.Prefix, Val.UnwindDataSize, Val.EHFrameHdrSize, Val.MappedSize, + Val.EHFrameHdrAddr, Val.EHFrameHdr, Val.EHFrameAddr); + } + static bool serialize(SPSOutputBuffer &OB, + const PerfJITCodeUnwindingInfoRecord &Val) { + return SPSPerfJITCodeUnwindingInfoRecord::AsArgList::serialize( + OB, Val.Prefix, Val.UnwindDataSize, Val.EHFrameHdrSize, Val.MappedSize, + Val.EHFrameHdrAddr, Val.EHFrameHdr, Val.EHFrameAddr); + } +}; + +using SPSPerfJITRecordBatch = SPSTuple, + SPSSequence, + SPSPerfJITCodeUnwindingInfoRecord>; +template <> +class SPSSerializationTraits { +public: + static size_t size(const PerfJITRecordBatch &Val) { + return SPSPerfJITRecordBatch::AsArgList::size( + Val.CodeLoadRecords, Val.DebugInfoRecords, Val.UnwindingRecord); + } + static bool deserialize(SPSInputBuffer &IB, PerfJITRecordBatch &Val) { + return SPSPerfJITRecordBatch::AsArgList::deserialize( + IB, Val.CodeLoadRecords, Val.DebugInfoRecords, Val.UnwindingRecord); + } + static bool serialize(SPSOutputBuffer &OB, const PerfJITRecordBatch &Val) { + return SPSPerfJITRecordBatch::AsArgList::serialize( + OB, Val.CodeLoadRecords, Val.DebugInfoRecords, Val.UnwindingRecord); + } +}; + +} // namespace shared + +} // namespace orc + +} // namespace llvm + +#endif \ No newline at end of file diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h @@ -0,0 +1,28 @@ +//===------- JITLoaderPerf.h --- Register profiler objects ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register objects for access by profilers via the perf JIT interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERPERF_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERPERF_H + +#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" +#include + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size); + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size); + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size); + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERPERF_H \ No newline at end of file diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt --- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -41,6 +41,7 @@ ObjectTransformLayer.cpp OrcABISupport.cpp OrcV2CBindings.cpp + PerfSupportPlugin.cpp RTDyldObjectLinkingLayer.cpp SimpleRemoteEPC.cpp Speculation.cpp diff --git a/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp @@ -0,0 +1,297 @@ +//===----- PerfSupportPlugin.cpp --- Utils for perf support -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Handles support for registering code with perf +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/PerfSupportPlugin.h" + +#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" + +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/ExecutionEngine/JITLink/x86_64.h" +#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h" + +#define DEBUG_TYPE "orc" + +using namespace llvm; +using namespace llvm::orc; +using namespace llvm::jitlink; + +namespace { + +// Creates an EH frame header prepared for a 32-bit relative relocation +// to the start of the .eh_frame section. Absolute injects a 64-bit absolute +// address space offset 4 bytes from the start instead of 4 bytes +Expected createX64EHFrameHeader(Section &EHFrame, + support::endianness endianness, + bool absolute) { + uint8_t Version = 1; + uint8_t EhFramePtrEnc = 0; + if (absolute) { + EhFramePtrEnc |= dwarf::DW_EH_PE_sdata8 | dwarf::DW_EH_PE_absptr; + } else { + EhFramePtrEnc |= dwarf::DW_EH_PE_sdata4 | dwarf::DW_EH_PE_datarel; + } + uint8_t FDECountEnc = dwarf::DW_EH_PE_omit; + uint8_t TableEnc = dwarf::DW_EH_PE_omit; + // X86_64_64 relocation to the start of the .eh_frame section + uint32_t EHFrameRelocation = 0; + // uint32_t FDECount = 0; + // Skip the FDE binary search table + // We'd have to reprocess the CIEs to get this information, + // which seems like more trouble than it's worth + // TODO consider implementing this. + // binary search table goes here + + size_t HeaderSize = + (sizeof(Version) + sizeof(EhFramePtrEnc) + sizeof(FDECountEnc) + + sizeof(TableEnc) + + (absolute ? sizeof(uint64_t) : sizeof(EHFrameRelocation))); + std::string HeaderContent(HeaderSize, '\0'); + BinaryStreamWriter Writer( + MutableArrayRef( + reinterpret_cast(HeaderContent.data()), HeaderSize), + endianness); + if (auto Err = Writer.writeInteger(Version)) + return std::move(Err); + if (auto Err = Writer.writeInteger(EhFramePtrEnc)) + return std::move(Err); + if (auto Err = Writer.writeInteger(FDECountEnc)) + return std::move(Err); + if (auto Err = Writer.writeInteger(TableEnc)) + return std::move(Err); + if (absolute) { + uint64_t EHFrameAddr = SectionRange(EHFrame).getStart().getValue(); + if (auto Err = Writer.writeInteger(EHFrameAddr)) + return std::move(Err); + } else { + if (auto Err = Writer.writeInteger(EHFrameRelocation)) + return std::move(Err); + } + return HeaderContent; +} + +constexpr StringRef RegisterPerfStartSymbolName = + "llvm_orc_registerJITLoaderPerfStart"; +constexpr StringRef RegisterPerfEndSymbolName = + "llvm_orc_registerJITLoaderPerfEnd"; +constexpr StringRef RegisterPerfImplSymbolName = + "llvm_orc_registerJITLoaderPerfImpl"; + +static PerfJITCodeLoadRecord +getCodeLoadRecord(const Symbol &Sym, std::atomic &CodeIndex) { + PerfJITCodeLoadRecord Record; + auto Name = Sym.getName(); + auto Addr = Sym.getAddress(); + auto Size = Sym.getSize(); + Record.Prefix.Id = PerfJITRecordType::JIT_CODE_LOAD; + // Runtime sets PID + Record.Pid = 0; + // Runtime sets TID + Record.Tid = 0; + Record.Vma = Addr.getValue(); + Record.CodeAddr = Addr.getValue(); + Record.CodeSize = Size; + Record.CodeIndex = CodeIndex++; + Record.Name = Name.str(); + // Initialize last, once all the other fields are filled + Record.Prefix.TotalSize = + (2 * sizeof(uint32_t) // id, total_size + + sizeof(uint64_t) // timestamp + + 2 * sizeof(uint32_t) // pid, tid + + 4 * sizeof(uint64_t) // vma, code_addr, code_size, code_index + + Name.size() + 1 // symbol name + + Record.CodeSize // code + ); + return Record; +} + +static std::optional +getDebugInfoRecord(const Symbol &Sym, DWARFContext *DC) { + if (!DC) { + LLVM_DEBUG(dbgs() << "No debug info available\n"); + return std::nullopt; + } + auto &Section = Sym.getBlock().getSection(); + auto Addr = Sym.getAddress(); + auto Size = Sym.getSize(); + auto SAddr = object::SectionedAddress{Addr.getValue(), Section.getOrdinal()}; + LLVM_DEBUG(dbgs() << "Getting debug info for symbol " << Sym.getName() + << " at address " << Addr.getValue() << " with size " + << Size << "\n" + << "Section ordinal: " << Section.getOrdinal() << "\n"); + auto LInfo = DC->getLineInfoForAddressRange( + SAddr, Size, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); + if (LInfo.empty()) { + // No line info available + LLVM_DEBUG(dbgs() << "No line info available\n"); + return std::nullopt; + } + PerfJITDebugInfoRecord Record; + Record.Prefix.Id = PerfJITRecordType::JIT_CODE_DEBUG_INFO; + Record.CodeAddr = Addr.getValue(); + for (const auto &Entry : LInfo) { + auto Addr = Entry.first; + // The function re-created by perf is preceded by a elf + // header. Need to adjust for that, otherwise the results are + // wrong. + Addr += 0x40; + Record.Entries.push_back({Addr, Entry.second.Line, + Entry.second.Discriminator, + Entry.second.FileName}); + } + size_t EntriesBytes = (2 // record header + + 2 // record fields + ) * + sizeof(uint64_t); + for (const auto &Entry : Record.Entries) { + EntriesBytes += + sizeof(uint64_t) + 2 * sizeof(uint32_t); // Addr, Line/Discrim + EntriesBytes += Entry.Name.size() + 1; // Name + } + Record.Prefix.TotalSize = EntriesBytes; + LLVM_DEBUG(dbgs() << "Created debug info record\n" + << "Total size: " << Record.Prefix.TotalSize << "\n" + << "Nr entries: " << Record.Entries.size() << "\n"); + return Record; +} + +static Expected +getUnwindingRecord(LinkGraph &G) { + PerfJITCodeUnwindingInfoRecord Record; + Record.Prefix.Id = PerfJITRecordType::JIT_CODE_UNWINDING_INFO; + Record.Prefix.TotalSize = 0; + auto Eh_frame = G.findSectionByName(".eh_frame"); + if (!Eh_frame) { + LLVM_DEBUG(dbgs() << "No .eh_frame section found\n"); + return Record; + } + if (!G.getTargetTriple().isOSBinFormatELF()) { + LLVM_DEBUG(dbgs() << "Not an ELF file, will not emit unwinding info\n"); + return Record; + } + auto SR = SectionRange(*Eh_frame); + auto EHFrameSize = SR.getSize(); + auto Eh_frame_hdr = G.findSectionByName(".eh_frame_hdr"); + if (!Eh_frame_hdr) { + if (G.getTargetTriple().getArch() == Triple::x86_64) { + auto Hdr = createX64EHFrameHeader(*Eh_frame, G.getEndianness(), true); + if (!Hdr) + return Hdr.takeError(); + Record.EHFrameHdr = std::move(*Hdr); + } else { + LLVM_DEBUG(dbgs() << "No .eh_frame_hdr section found\n"); + return Record; + } + Record.EHFrameHdrAddr = 0; + Record.EHFrameHdrSize = Record.EHFrameHdr.size(); + Record.UnwindDataSize = EHFrameSize + Record.EHFrameHdrSize; + Record.MappedSize = 0; // Because the EHFrame header was not mapped + } else { + auto SR = SectionRange(*Eh_frame_hdr); + Record.EHFrameHdrAddr = SR.getStart().getValue(); + Record.EHFrameHdrSize = SR.getSize(); + Record.UnwindDataSize = EHFrameSize + Record.EHFrameHdrSize; + Record.MappedSize = Record.UnwindDataSize; + } + Record.EHFrameAddr = SR.getStart().getValue(); + Record.Prefix.TotalSize = + (2 * sizeof(uint32_t) // id, total_size + + sizeof(uint64_t) // timestamp + + + 3 * sizeof(uint64_t) // unwind_data_size, eh_frame_hdr_size, mapped_size + + Record.UnwindDataSize // eh_frame_hdr, eh_frame + ); + LLVM_DEBUG(dbgs() << "Created unwind record\n" + << "Total size: " << Record.Prefix.TotalSize << "\n" + << "Unwind size: " << Record.UnwindDataSize << "\n" + << "EHFrame size: " << EHFrameSize << "\n" + << "EHFrameHdr size: " << Record.EHFrameHdrSize << "\n"); + return Record; +} + +static PerfJITRecordBatch getRecords(ExecutionSession &ES, LinkGraph &G, + DWARFContext *DC, + std::atomic &CodeIndex, + bool EmitUnwindInfo) { + PerfJITRecordBatch Batch; + for (auto Sym : G.defined_symbols()) { + if (!Sym->hasName() || !Sym->isCallable()) + continue; + auto DebugInfo = getDebugInfoRecord(*Sym, DC); + if (DebugInfo) + Batch.DebugInfoRecords.push_back(std::move(*DebugInfo)); + Batch.CodeLoadRecords.push_back(getCodeLoadRecord(*Sym, CodeIndex)); + } + if (EmitUnwindInfo) { + auto UWR = getUnwindingRecord(G); + if (!UWR) { + ES.reportError(UWR.takeError()); + } else { + Batch.UnwindingRecord = std::move(*UWR); + } + } else { + Batch.UnwindingRecord.Prefix.TotalSize = 0; + } + return Batch; +} +} // namespace + +PerfSupportPlugin::PerfSupportPlugin(ExecutorProcessControl &EPC, + ExecutorAddr RegisterPerfStartAddr, + ExecutorAddr RegisterPerfEndAddr, + ExecutorAddr RegisterPerfImplAddr, + bool EmitUnwindInfo) + : EPC(EPC), RegisterPerfStartAddr(RegisterPerfStartAddr), + RegisterPerfEndAddr(RegisterPerfEndAddr), + RegisterPerfImplAddr(RegisterPerfImplAddr), CodeIndex(0), + EmitUnwindInfo(EmitUnwindInfo) { + cantFail(EPC.callSPSWrapper(RegisterPerfStartAddr)); +} +PerfSupportPlugin::~PerfSupportPlugin() { + cantFail(EPC.callSPSWrapper(RegisterPerfEndAddr)); +} + +void PerfSupportPlugin::modifyPassConfig(MaterializationResponsibility &MR, + LinkGraph &G, + PassConfiguration &Config) { + Config.PostFixupPasses.push_back([this](LinkGraph &G) { + // TODO get an actual DWARFContext for line info + DWARFContext *DWC = nullptr; + auto Batch = getRecords(EPC.getExecutionSession(), G, DWC, CodeIndex, + EmitUnwindInfo); + G.allocActions().push_back( + {cantFail(shared::WrapperFunctionCall::Create< + shared::SPSArgList>( + RegisterPerfImplAddr, Batch)), + {}}); + return Error::success(); + }); +} + +Expected> +PerfSupportPlugin::Create(ExecutorProcessControl &EPC, JITDylib &JD, + bool EmitUnwindInfo) { + if (!EPC.getTargetTriple().isOSBinFormatELF()) { + return make_error( + "Perf support only available for ELF LinkGraphs!", + inconvertibleErrorCode()); + } + auto &ES = EPC.getExecutionSession(); + ExecutorAddr StartAddr, EndAddr, ImplAddr; + if (auto Err = lookupAndRecordAddrs( + ES, LookupKind::Static, makeJITDylibSearchOrder({&JD}), + {{ES.intern(RegisterPerfStartSymbolName), &StartAddr}, + {ES.intern(RegisterPerfEndSymbolName), &EndAddr}, + {ES.intern(RegisterPerfImplSymbolName), &ImplAddr}})) + return std::move(Err); + return std::make_unique(EPC, StartAddr, EndAddr, ImplAddr, + EmitUnwindInfo); +} diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_component_library(LLVMOrcTargetProcess ExecutorSharedMemoryMapperService.cpp JITLoaderGDB.cpp + JITLoaderPerf.cpp OrcRTBootstrap.cpp RegisterEHFrames.cpp SimpleExecutorDylibManager.cpp diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp @@ -0,0 +1,457 @@ +//===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register objects for access by profilers via the perf JIT interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h" + +#include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h" + +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Threading.h" + +#include +#include + +#ifdef __linux__ + +#include // mmap() +#include // clock_gettime(), time(), localtime_r() */ +#include // for read(), close() + +#define DEBUG_TYPE "orc" + +// language identifier (XXX: should we generate something better from debug +// info?) +#define JIT_LANG "llvm-IR" +#define LLVM_PERF_JIT_MAGIC \ + ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ + (uint32_t)'D') +#define LLVM_PERF_JIT_VERSION 1 + +using namespace llvm; +using namespace llvm::orc; + +struct PerfState { + // cache lookups + uint32_t Pid; + + // base directory for output data + std::string JitPath; + + // output data stream, closed via Dumpstream + int DumpFd = -1; + + // output data stream + std::unique_ptr Dumpstream; + + // perf mmap marker + void *MarkerAddr = NULL; +}; + +// prevent concurrent dumps from messing up the output file +static std::mutex Mutex; +static std::optional State; + +struct RecHeader { + uint32_t Id; + uint32_t TotalSize; + uint64_t Timestamp; +}; + +struct DIR { + RecHeader Prefix; + uint64_t CodeAddr; + uint64_t NrEntry; +}; + +struct DIE { + uint64_t CodeAddr; + uint32_t Line; + uint32_t Discrim; +}; + +struct CLR { + RecHeader Prefix; + uint32_t Pid; + uint32_t Tid; + uint64_t Vma; + uint64_t CodeAddr; + uint64_t CodeSize; + uint64_t CodeIndex; +}; + +struct UWR { + RecHeader Prefix; + uint64_t UnwindDataSize; + uint64_t EhFrameHeaderSize; + uint64_t MappedSize; +}; + +static inline uint64_t timespec_to_ns(const struct timespec *TS) { + const uint64_t NanoSecPerSec = 1000000000; + return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec; +} + +static inline uint64_t perf_get_timestamp() { + timespec TS; + if (clock_gettime(CLOCK_MONOTONIC, &TS)) + return 0; + + return timespec_to_ns(&TS); +} + +static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) { + assert(State && "PerfState not initialized"); + LLVM_DEBUG(dbgs() << "Writing debug record with " + << DebugRecord.Entries.size() << " entries\n"); + size_t Written = 0; + DIR Dir{RecHeader{static_cast(DebugRecord.Prefix.Id), + DebugRecord.Prefix.TotalSize, perf_get_timestamp()}, + DebugRecord.CodeAddr, DebugRecord.Entries.size()}; + State->Dumpstream->write(reinterpret_cast(&Dir), sizeof(Dir)); + Written += sizeof(Dir); + for (auto &Die : DebugRecord.Entries) { + DIE d{Die.Addr, Die.Lineno, Die.Discrim}; + State->Dumpstream->write(reinterpret_cast(&d), sizeof(d)); + State->Dumpstream->write(Die.Name.data(), Die.Name.size() + 1); + Written += sizeof(d) + Die.Name.size() + 1; + } + LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n"); +} + +static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) { + assert(State && "PerfState not initialized"); + uint32_t Tid = get_threadid(); + LLVM_DEBUG(dbgs() << "Writing code record with code size " + << CodeRecord.CodeSize << " and code index " + << CodeRecord.CodeIndex << "\n"); + CLR Clr{RecHeader{static_cast(CodeRecord.Prefix.Id), + CodeRecord.Prefix.TotalSize, perf_get_timestamp()}, + State->Pid, + Tid, + CodeRecord.Vma, + CodeRecord.CodeAddr, + CodeRecord.CodeSize, + CodeRecord.CodeIndex}; + LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, " + << CodeRecord.Name.size() + 1 << " bytes of name, " + << CodeRecord.CodeSize << " bytes of code\n"); + State->Dumpstream->write(reinterpret_cast(&Clr), sizeof(Clr)); + State->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1); + State->Dumpstream->write((const char *)CodeRecord.CodeAddr, + CodeRecord.CodeSize); +} + +static void +writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) { + assert(State && "PerfState not initialized"); + dbgs() << "Writing unwind record with unwind data size " + << UnwindRecord.UnwindDataSize << " and EH frame header size " + << UnwindRecord.EHFrameHdrSize << " and mapped size " + << UnwindRecord.MappedSize << "\n"; + UWR Uwr{RecHeader{static_cast(UnwindRecord.Prefix.Id), + UnwindRecord.Prefix.TotalSize, perf_get_timestamp()}, + UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize, + UnwindRecord.MappedSize}; + LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, " + << UnwindRecord.EHFrameHdrSize + << " bytes of EH frame header, " + << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize + << " bytes of EH frame\n"); + State->Dumpstream->write(reinterpret_cast(&Uwr), sizeof(Uwr)); + if (UnwindRecord.EHFrameHdrAddr) + State->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr, + UnwindRecord.EHFrameHdrSize); + else + State->Dumpstream->write(UnwindRecord.EHFrameHdr.data(), + UnwindRecord.EHFrameHdrSize); + State->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr, + UnwindRecord.UnwindDataSize - + UnwindRecord.EHFrameHdrSize); +} + +static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) { + if (!State) + return make_error("PerfState not initialized", + inconvertibleErrorCode()); + + // Serialize the batch + std::lock_guard Lock(Mutex); + if (Batch.UnwindingRecord.Prefix.TotalSize > 0) + writeUnwindRecord(Batch.UnwindingRecord); + + for (const auto &DebugInfo : Batch.DebugInfoRecords) + writeDebugRecord(DebugInfo); + + for (const auto &CodeLoad : Batch.CodeLoadRecords) + writeCodeRecord(CodeLoad); + + State->Dumpstream->flush(); + + return Error::success(); +} + +struct Header { + uint32_t Magic; // characters "JiTD" + uint32_t Version; // header version + uint32_t TotalSize; // total size of header + uint32_t ElfMach; // elf mach target + uint32_t Pad1; // reserved + uint32_t Pid; + uint64_t Timestamp; // timestamp + uint64_t Flags; // flags +}; + +static Error OpenMarker(PerfState &State) { + // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap + // is captured either live (perf record running when we mmap) or in deferred + // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump + // file for more meta data info about the jitted code. Perf report/annotate + // detect this special filename and process the jitdump file. + // + // Mapping must be PROT_EXEC to ensure it is captured by perf record + // even when not using -d option. + State.MarkerAddr = + ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC, + MAP_PRIVATE, State.DumpFd, 0); + + if (State.MarkerAddr == MAP_FAILED) + return make_error("could not mmap JIT marker", + inconvertibleErrorCode()); + + return Error::success(); +} + +void CloseMarker(PerfState &State) { + if (!State.MarkerAddr) + return; + + munmap(State.MarkerAddr, sys::Process::getPageSizeEstimate()); + State.MarkerAddr = nullptr; +} + +static Expected
FillMachine(PerfState &State) { + Header Hdr; + Hdr.Magic = LLVM_PERF_JIT_MAGIC; + Hdr.Version = LLVM_PERF_JIT_VERSION; + Hdr.TotalSize = sizeof(Hdr); + Hdr.Pid = State.Pid; + Hdr.Timestamp = perf_get_timestamp(); + + char Id[16]; + struct { + uint16_t e_type; + uint16_t e_machine; + } Info; + + size_t RequiredMemory = sizeof(Id) + sizeof(Info); + + ErrorOr> MB = + MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0); + + // This'll not guarantee that enough data was actually read from the + // underlying file. Instead the trailing part of the buffer would be + // zeroed. Given the ELF signature check below that seems ok though, + // it's unlikely that the file ends just after that, and the + // consequence would just be that perf wouldn't recognize the + // signature. + if (!MB) + return make_error("could not open /proc/self/exe", + MB.getError()); + + memcpy(&Id, (*MB)->getBufferStart(), sizeof(Id)); + memcpy(&Info, (*MB)->getBufferStart() + sizeof(Id), sizeof(Info)); + + // check ELF signature + if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F') + return make_error("invalid ELF signature", + inconvertibleErrorCode()); + + Hdr.ElfMach = Info.e_machine; + + return Hdr; +} + +static Error InitDebuggingDir(PerfState &State) { + time_t Time; + struct tm LocalTime; + char TimeBuffer[sizeof("YYYYMMDD")]; + SmallString<64> Path; + + // search for location to dump data to + if (const char *BaseDir = getenv("JITDUMPDIR")) + Path.append(BaseDir); + else if (!sys::path::home_directory(Path)) + Path = "."; + + // create debug directory + Path += "/.debug/jit/"; + if (auto EC = sys::fs::create_directories(Path)) { + std::string ErrStr; + raw_string_ostream ErrStream(ErrStr); + ErrStream << "could not create jit cache directory " << Path << ": " + << EC.message() << "\n"; + return make_error(std::move(ErrStr), inconvertibleErrorCode()); + } + + // create unique directory for dump data related to this process + time(&Time); + localtime_r(&Time, &LocalTime); + strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); + Path += JIT_LANG "-jit-"; + Path += TimeBuffer; + + SmallString<128> UniqueDebugDir; + + using sys::fs::createUniqueDirectory; + if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { + std::string ErrStr; + raw_string_ostream ErrStream(ErrStr); + ErrStream << "could not create unique jit cache directory " + << UniqueDebugDir << ": " << EC.message() << "\n"; + return make_error(std::move(ErrStr), inconvertibleErrorCode()); + } + + State.JitPath = std::string(UniqueDebugDir.str()); + + return Error::success(); +} + +static Error registerJITLoaderPerfStartImpl() { + PerfState Tentative; + Tentative.Pid = sys::Process::getProcessId(); + // check if clock-source is supported + if (!perf_get_timestamp()) + return make_error("kernel does not support CLOCK_MONOTONIC", + inconvertibleErrorCode()); + + if (auto Err = InitDebuggingDir(Tentative)) + return Err; + + std::string Filename; + raw_string_ostream FilenameBuf(Filename); + FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump"; + + // Need to open ourselves, because we need to hand the FD to OpenMarker() and + // raw_fd_ostream doesn't expose the FD. + using sys::fs::openFileForWrite; + if (auto EC = openFileForReadWrite(FilenameBuf.str(), Tentative.DumpFd, + sys::fs::CD_CreateNew, sys::fs::OF_None)) { + std::string ErrStr; + raw_string_ostream ErrStream(ErrStr); + ErrStream << "could not open JIT dump file " << FilenameBuf.str() << ": " + << EC.message() << "\n"; + return make_error(std::move(ErrStr), inconvertibleErrorCode()); + } + + Tentative.Dumpstream = + std::make_unique(Tentative.DumpFd, true); + + auto Header = FillMachine(Tentative); + if (!Header) + return Header.takeError(); + + // signal this process emits JIT information + if (auto Err = OpenMarker(Tentative)) + return Err; + + Tentative.Dumpstream->write(reinterpret_cast(&Header.get()), + sizeof(*Header)); + + // Everything initialized, can do profiling now. + if (Tentative.Dumpstream->has_error()) + return make_error("could not write JIT dump header", + inconvertibleErrorCode()); + + State = std::move(Tentative); + return Error::success(); +} + +static Error registerJITLoaderPerfEndImpl() { + if (!State) + return make_error("PerfState not initialized", + inconvertibleErrorCode()); + + RecHeader Close; + Close.Id = static_cast(PerfJITRecordType::JIT_CODE_CLOSE); + Close.TotalSize = sizeof(Close); + Close.Timestamp = perf_get_timestamp(); + State->Dumpstream->write(reinterpret_cast(&Close), + sizeof(Close)); + if (State->MarkerAddr) + CloseMarker(*State); + + State.reset(); + return Error::success(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction::handle( + Data, Size, registerJITLoaderPerfImpl) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction::handle(Data, Size, + registerJITLoaderPerfStartImpl) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction::handle(Data, Size, + registerJITLoaderPerfEndImpl) + .release(); +} + +#else + +using namespace llvm; +using namespace llvm::orc; + +static Error badOS() { + using namespace llvm; + return llvm::make_error( + "unsupported OS (perf support is only available on linux!)", + inconvertibleErrorCode()); +} + +static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); } + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { + using namespace shared; + return WrapperFunction::handle(Data, Size, + badOSBatch) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { + using namespace shared; + return WrapperFunction::handle(Data, Size, badOS).release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { + using namespace shared; + return WrapperFunction::handle(Data, Size, badOS).release(); +} + +#endif diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_perf.s b/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_perf.s new file mode 100644 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_perf.s @@ -0,0 +1,204 @@ +# REQUIRES: native && x86_64-linux + +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=x86_64-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t/ELF_x86-64_perf.o %s +# RUN: JITDUMPDIR="%t" llvm-jitlink -perf-support \ +# RUN: %t/ELF_x86-64_perf.o +# RUN: test -f %t/.debug/jit/llvm-IR-jit-*/jit-*.dump + +# Test ELF perf support for code load records and unwind info + + .text + .file "example.c" + .section .text.source,"ax",@progbits + .globl source # -- Begin function source + .p2align 4, 0x90 + .type source,@function +source: # @source +.Lfunc_begin0: + .file 1 "/app" "example.c" + .loc 1 1 0 # example.c:1:0 + .cfi_startproc +# %bb.0: + .loc 1 2 5 prologue_end # example.c:2:5 + movl $1, %eax + retq +.Ltmp0: +.Lfunc_end0: + .size source, .Lfunc_end0-source + .cfi_endproc + # -- End function + .section .text.passthrough,"ax",@progbits + .globl passthrough # -- Begin function passthrough + .p2align 4, 0x90 + .type passthrough,@function +passthrough: # @passthrough +.Lfunc_begin1: + .loc 1 5 0 # example.c:5:0 + .cfi_startproc +# %bb.0: + .loc 1 6 5 prologue_end # example.c:6:5 + movl $1, %eax + retq +.Ltmp1: +.Lfunc_end1: + .size passthrough, .Lfunc_end1-passthrough + .cfi_endproc + # -- End function + .section .text.main,"ax",@progbits + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin2: + .loc 1 9 0 # example.c:9:0 + .cfi_startproc +# %bb.0: + .loc 1 10 5 prologue_end # example.c:10:5 + xorl %eax, %eax + retq +.Ltmp2: +.Lfunc_end2: + .size main, .Lfunc_end2-main + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .ascii "\227B" # DW_AT_GNU_all_call_sites + .byte 25 # DW_FORM_flag_present + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x72 DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 12 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .long .Ldebug_ranges0 # DW_AT_ranges + .byte 2 # Abbrev [2] 0x2a:0x19 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_GNU_all_call_sites + .long .Linfo_string3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 117 # DW_AT_type + # DW_AT_external + .byte 2 # Abbrev [2] 0x43:0x19 DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_GNU_all_call_sites + .long .Linfo_string5 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 117 # DW_AT_type + # DW_AT_external + .byte 2 # Abbrev [2] 0x5c:0x19 DW_TAG_subprogram + .quad .Lfunc_begin2 # DW_AT_low_pc + .long .Lfunc_end2-.Lfunc_begin2 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_GNU_all_call_sites + .long .Linfo_string6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 117 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x75:0x7 DW_TAG_base_type + .long .Linfo_string4 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_ranges,"",@progbits +.Ldebug_ranges0: + .quad .Lfunc_begin0 + .quad .Lfunc_end0 + .quad .Lfunc_begin1 + .quad .Lfunc_end1 + .quad .Lfunc_begin2 + .quad .Lfunc_end2 + .quad 0 + .quad 0 + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 4ba6a9c9f65bbc8bd06e3652cb20fd4dfc846137)" # string offset=0 +.Linfo_string1: + .asciz "/app/example.c" # string offset=105 +.Linfo_string2: + .asciz "/app" # string offset=120 +.Linfo_string3: + .asciz "source" # string offset=125 +.Linfo_string4: + .asciz "int" # string offset=132 +.Linfo_string5: + .asciz "passthrough" # string offset=136 +.Linfo_string6: + .asciz "main" # string offset=148 + .ident "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 4ba6a9c9f65bbc8bd06e3652cb20fd4dfc846137)" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -28,8 +28,10 @@ #include "llvm/ExecutionEngine/Orc/MachOPlatform.h" #include "llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h" #include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h" +#include "llvm/ExecutionEngine/Orc/PerfSupportPlugin.h" #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -140,6 +142,11 @@ cl::desc("Enable debugger suppport (default = !-noexec)"), cl::init(true), cl::Hidden, cl::cat(JITLinkCategory)); +static cl::opt PerfSupport("perf-support", + cl::desc("Enable perf profiling support"), + cl::init(false), cl::Hidden, + cl::cat(JITLinkCategory)); + static cl::opt NoProcessSymbols("no-process-syms", cl::desc("Do not resolve to llvm-jitlink process symbols"), @@ -243,10 +250,14 @@ static ExitOnError ExitOnErr; static LLVM_ATTRIBUTE_USED void linkComponents() { - errs() << (void *)&llvm_orc_registerEHFrameSectionWrapper - << (void *)&llvm_orc_deregisterEHFrameSectionWrapper - << (void *)&llvm_orc_registerJITLoaderGDBWrapper - << (void *)&llvm_orc_registerJITLoaderGDBAllocAction; + errs() << "Linking in runtime functions\n" + << (void *)&llvm_orc_registerEHFrameSectionWrapper << '\n' + << (void *)&llvm_orc_deregisterEHFrameSectionWrapper << '\n' + << (void *)&llvm_orc_registerJITLoaderGDBWrapper << '\n' + << (void *)&llvm_orc_registerJITLoaderGDBAllocAction << '\n' + << (void *)&llvm_orc_registerJITLoaderPerfStart << '\n' + << (void *)&llvm_orc_registerJITLoaderPerfEnd << '\n' + << (void *)&llvm_orc_registerJITLoaderPerfImpl << '\n'; } static bool UseTestResultOverride = false; @@ -979,6 +990,10 @@ ObjLayer.addPlugin(ExitOnErr( GDBJITDebugInfoRegistrationPlugin::Create(this->ES, *MainJD, TT))); + if (PerfSupport && TT.isOSBinFormatELF()) + ObjLayer.addPlugin(ExitOnErr(PerfSupportPlugin::Create( + this->ES.getExecutorProcessControl(), *MainJD, true))); + // Set up the platform. if (TT.isOSBinFormatMachO() && !OrcRuntime.empty()) { if (auto P =