diff --git a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake --- a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake +++ b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake @@ -240,3 +240,60 @@ endif() set(OPENMP_PTHREAD_LIB ${LLVM_PTHREAD_LIB}) + +################################################################################ +# Looking for Level0 +################################################################################ + +message(STATUS "Looking for Level0 includes.") + +find_path(LIBOMPTARGET_DEP_LEVEL0_INCLUDE_DIRS + NAMES + ze_api.h + PATHS + ENV LIBOMPTARGET_LEVEL0_ROOT + ENV CPATH + PATH_SUFFIXES + level_zero + include/level_zero) + +if(NOT LIBOMPTARGET_DEP_LEVEL0_INCLUDE_DIRS) + set(LIBOMPTARGET_DEP_LEVEL0_FOUND FALSE) + message(STATUS "Could NOT find Level0. Missing includes.") +else() + message(STATUS "Level0 include DIR: ${LIBOMPTARGET_DEP_LEVEL0_INCLUDE_DIRS}") + message(STATUS "Looking for Level0 library.") + + # Search L0 library + if(WIN32) + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(LEVEL0_LIBRARY_NAME level_zero64) + else() + set(LEVEL0_LIBRARY_NAME level_zero32) + endif() + else() + set(LEVEL0_LIBRARY_NAME level_zero ze_loader) + endif() + find_library(LIBOMPTARGET_DEP_LEVEL0_LIBRARIES + NAMES + ${LEVEL0_LIBRARY_NAME} + PATHS + ENV LIBOMPTARGET_LEVEL0_ROOT + ENV LIBRARY_PATH + ENV LD_LIBRARY_PATH + PATH_SUFFIXES + lib/ubuntu_18.04 lib) # TODO: follow up with path changes + + if(NOT LIBOMPTARGET_DEP_LEVEL0_LIBRARIES) + set(LIBOMPTARGET_DEP_LEVEL0_FOUND FALSE) + message(STATUS "Could NOT find Level0. Missing library.") + else() + message(STATUS "Level0 library: ${LIBOMPTARGET_DEP_LEVEL0_LIBRARIES}") + set(LIBOMPTARGET_DEP_LEVEL0_FOUND TRUE) + endif() +endif() + +mark_as_advanced( + LIBOMPTARGET_DEP_LEVEL0_FOUND + LIBOMPTARGET_DEP_LEVEL0_INCLUDE_DIRS + LIBOMPTARGET_DEP_LEVEL0_LIBRARIES) diff --git a/openmp/libomptarget/plugins/CMakeLists.txt b/openmp/libomptarget/plugins/CMakeLists.txt --- a/openmp/libomptarget/plugins/CMakeLists.txt +++ b/openmp/libomptarget/plugins/CMakeLists.txt @@ -85,6 +85,7 @@ add_subdirectory(ve) add_subdirectory(x86_64) add_subdirectory(remote) +add_subdirectory(level0) # Make sure the parent scope can see the plugins that will be created. set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE) diff --git a/openmp/libomptarget/plugins/common/elf_common/CMakeLists.txt b/openmp/libomptarget/plugins/common/elf_common/CMakeLists.txt --- a/openmp/libomptarget/plugins/common/elf_common/CMakeLists.txt +++ b/openmp/libomptarget/plugins/common/elf_common/CMakeLists.txt @@ -10,7 +10,7 @@ # ##===----------------------------------------------------------------------===## -add_library(elf_common OBJECT elf_common.cpp ELFSymbols.cpp) +add_library(elf_common OBJECT elf_common.cpp ELFSymbols.cpp elf_light.cpp) # Build elf_common with PIC to be able to link it with plugin shared libraries. set_property(TARGET elf_common PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/openmp/libomptarget/plugins/common/elf_common/elf_light.h b/openmp/libomptarget/plugins/common/elf_common/elf_light.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/common/elf_common/elf_light.h @@ -0,0 +1,193 @@ +//===-- elf_light.h - Basic ELF functionality -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Light ELF implementation provides basic ELF reading functionality. +// It may be used in systems without libelf support, if the corresponding +// LLVM ELF implementation is available. +// The interface declared here must be independent of libelf.h/elf.h. +// +// NOTE: we can try to rely on https://github.com/WolfgangSt/libelf +// on Windows, if this implementation gets more complex. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_ELF_COMMON_ELF_LIGHT_H +#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_ELF_COMMON_ELF_LIGHT_H + +#include +#include +#include + +class ElfL; +class ElfLSegmentNoteIterator; +class ElfLSectionNoteIterator; +class ElfNote; + +// Class representing NOTEs from PT_NOTE segments and SHT_NOTE sections. +class ElfLNote { + const void *Impl = nullptr; + + friend class ElfLSegmentNoteIterator; + friend class ElfLSectionNoteIterator; + + // Only ElfLSectionNoteIterator is allowed to create notes via its + // operator*(). + explicit ElfLNote(const void *I); + ElfLNote &operator=(const ElfLNote &) = delete; + +public: + // FIXME: add move copy constructor and assignment operator. + ElfLNote(const ElfLNote &); + ~ElfLNote(); + // Returns the note's name size not including the null terminator. + // Note that it may be illegal to access the getName() pointer + // beyond the returned size, i.e. the implementation may + // not guarantee that there is '\0' after getNameSize() + // characters of the name. + uint64_t getNameSize() const; + // Returns a pointer to the beginning of the note's name. + const char *getName() const; + // Returns the number of bytes in the descriptor. + uint64_t getDescSize() const; + // Returns a pointer to the beginning of the note's descriptor. + // It is illegal to access more that getDescSize() bytes + // via this pointer. + const uint8_t *getDesc() const; + uint64_t getType() const; +}; + +// Iterator over NOTEs in PT_NOTE segments. +class ElfLSegmentNoteIterator { + + void *Impl = nullptr; + + friend class ElfL; + + // Only ElfL is allowed to create iterators to itself. + ElfLSegmentNoteIterator(const void *I, bool IsEnd = false); + ElfLSectionNoteIterator &operator=(const ElfLSegmentNoteIterator &) = delete; + +public: + using iterator_category = std::forward_iterator_tag; + using value_type = ElfLNote; + using difference_type = std::ptrdiff_t; + using pointer = ElfLNote*; + using reference = ElfLNote&; + // FIXME: add move copy constructor and assignment operator. + ElfLSegmentNoteIterator(const ElfLSegmentNoteIterator &Other); + ~ElfLSegmentNoteIterator(); + ElfLSegmentNoteIterator &operator++(); + bool operator==(const ElfLSegmentNoteIterator Other) const; + bool operator!=(const ElfLSegmentNoteIterator Other) const; + ElfLNote operator*() const; +}; + +// Iterator over NOTEs in SHT_NOTE sections. +class ElfLSectionNoteIterator { + + void *Impl = nullptr; + + friend class ElfL; + + // Only ElfL is allowed to create iterators to itself. + ElfLSectionNoteIterator(const void *I, bool IsEnd = false); + ElfLSectionNoteIterator &operator=(const ElfLSectionNoteIterator &) = delete; + +public: + using iterator_category = std::forward_iterator_tag; + using value_type = ElfLNote; + using difference_type = std::ptrdiff_t; + using pointer = ElfLNote*; + using reference = ElfLNote&; + // FIXME: add move copy constructor and assignment operator. + ElfLSectionNoteIterator(const ElfLSectionNoteIterator &Other); + ~ElfLSectionNoteIterator(); + ElfLSectionNoteIterator &operator++(); + bool operator==(const ElfLSectionNoteIterator Other) const; + bool operator!=(const ElfLSectionNoteIterator Other) const; + ElfLNote operator*() const; +}; + +// Class representing ELF section. +class ElfLSection { + const void *Impl = nullptr; + + friend class ElfLSectionIterator; + + // Only ElfLSectionIterator is allowed to create sections via its + // operator*(). + explicit ElfLSection(const void *I); + ElfLSection &operator=(const ElfLSection &) = delete; + +public: + // FIXME: add move copy constructor and assignment operator. + ElfLSection(const ElfLSection &); + ~ElfLSection(); + + // Returns the section name, which is is a null-terminated string. + const char *getName() const; + // Returns the section size. + uint64_t getSize() const; + // Returns a pointer to the beginning of the section. + const uint8_t *getContents() const; +}; + +// Iterator over sections. +class ElfLSectionIterator { + + void *Impl = nullptr; + + friend class ElfL; + + // Only ElfL is allowed to create iterators to itself. + ElfLSectionIterator(const void *I, bool IsEnd = false); + ElfLSectionIterator &operator=(const ElfLSectionIterator &) = delete; + +public: + using iterator_category = std::forward_iterator_tag; + using value_type = ElfLSection; + using difference_type = std::ptrdiff_t; + using pointer = ElfLSection*; + using reference = ElfLSection&; + // FIXME: add move copy constructor and assignment operator. + ElfLSectionIterator(const ElfLSectionIterator &Other); + ~ElfLSectionIterator(); + ElfLSectionIterator &operator++(); + bool operator==(const ElfLSectionIterator Other) const; + bool operator!=(const ElfLSectionIterator Other) const; + ElfLSection operator*() const; +}; + +// Wrapper around the given ELF image. +class ElfL { + // Opaque pointer to the actual implementation. + void *Impl = nullptr; + + // FIXME: implement if needed. + ElfL(const ElfL &) = delete; + ElfL &operator=(const ElfL &) = delete; + +public: + ElfL(char *Begin, size_t Size); + ~ElfL(); + bool isValidElf() const; + const char *getErrmsg(int N) const; + uint16_t getEMachine() const; + uint16_t getEType() const; + + static bool isDynType(uint16_t Ty); + + ElfLSectionNoteIterator section_notes_begin() const; + ElfLSectionNoteIterator section_notes_end() const; + ElfLSegmentNoteIterator segment_notes_begin() const; + ElfLSegmentNoteIterator segment_notes_end() const; + ElfLSectionIterator sections_begin() const; + ElfLSectionIterator sections_end() const; +}; + +#endif // LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_ELF_COMMON_ELF_LIGHT_H diff --git a/openmp/libomptarget/plugins/common/elf_common/elf_light.cpp b/openmp/libomptarget/plugins/common/elf_common/elf_light.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/common/elf_common/elf_light.cpp @@ -0,0 +1,1682 @@ +//===-- elf_light.cpp - Basic ELF functionality -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "elf_light.h" +#include "Debug.h" +#include + +#ifndef TARGET_NAME +#define TARGET_NAME ELF light +#endif +#define DEBUG_PREFIX "TARGET " GETNAME(TARGET_NAME) + +#if MAY_USE_LIBELF + +// Implementation based on libelf. +#include +#include + +// Helper methods to align addresses. +template inline T alignDown(T value, size_t alignment) { + return (T)(value & ~(alignment - 1)); +} + +template inline T *alignDown(T *value, size_t alignment) { + return reinterpret_cast(alignDown((intptr_t)value, alignment)); +} + +template inline T alignUp(T value, size_t alignment) { + return alignDown((T)(value + alignment - 1), alignment); +} + +template inline T *alignUp(T *value, size_t alignment) { + return reinterpret_cast( + alignDown((intptr_t)(value + alignment - 1), alignment)); +} + +// FIXME: this is taken from openmp/libomptarget/plugins/amdgpu/impl/system.cpp, +// but it may be incorrect for 64-bit ELF. Elf64_Nhdr and Elf32_Nhdr +// have different representation. The alignment used for the name and +// the descriptor is still 4 bytes. At the same time, it seems to work +// for 64-bit ELFs produced by LLVM. +struct Elf_Note { + uint32_t n_namesz; // Length of note's name. + uint32_t n_descsz; // Length of note's value. + uint32_t n_type; // Type of note. + // then name + // then padding, optional + // then desc, at 4 byte alignment (not 8, despite being elf64) +}; + +static const uint32_t NoteAlignment = 4; + +// Implementation of the iterator for SHT_NOTE sections. +// The iterator allows processing all NOTEs in all SHT_NOTE sections +// in the ELF image provided during the iterator object construction. +class ElfLSectionNoteIteratorImpl { + // A pointer to Elf object created by elf_memory() for + // the ELF image we are going to iterate. + Elf *EF; + + // A pointer to the current SHT_NOTE section. + // In the initial state of the iterator object this will actually + // point to the very first section in the ELF image, but it will be + // adjusted right away either to point to the first SHT_NOTE section + // or set to nullptr (if there are no SHT_NOTE sections). + Elf_Scn *CurrentSection = nullptr; + + // A pointer to the current NOTE inside a SHT_NOTE section + // pointed by CurrentSection. If it is nullptr, then this means + // that the iterator object is an end() iterator. + Elf_Note *NPtr = nullptr; + + uint64_t getNotesBeginAddr(const GElf_Shdr &Shdr) const { + return reinterpret_cast(elf_rawfile(EF, nullptr)) + + Shdr.sh_offset; + } + + uint64_t getNotesEndAddr(const GElf_Shdr &Shdr) const { + return getNotesBeginAddr(Shdr) + Shdr.sh_size; + } + + uint64_t getNoteSize(const Elf_Note &Note) const { + return sizeof(Note) + alignUp(Note.n_namesz, NoteAlignment) + + alignUp(Note.n_descsz, NoteAlignment); + } + + // Given the current state of the iterator object, advances + // the iterator forward to point to the next NOTE in the next + // SHT_NOTE section. + // If there is no such a NOTE, then it sets the iterator + // object to the end() state. + // + // Note that this method does not change the iterator, if + // NPtr is pointing to a valid note within CurrentSection. + // The iterator advancement in this case is done via operator++. + void autoAdvance(bool IsFirst = false) { + // Cannot advance, if CurrentSection is NULL. + if (!CurrentSection) + return; + + // NPtr points to a valid NOTE in CurrentSection, thus, + // no auto advancement. + if (NPtr) + return; + + GElf_Shdr Shdr; + gelf_getshdr(CurrentSection, &Shdr); + + // CurrentSection is a valid section, and NPtr is an end() iterator. + // + // If IsFirst is true, then we just in the initial state, and + // we need to set CurrentSection to the first SHT_NOTE section (if any), + // and, then, NPtr to the first note in this section. + // + // If IsFirst is false, then we've reached the end of the current + // SHT_NOTE section, and should find the next section with notes. + if (!IsFirst || gelf_getshdr(CurrentSection, &Shdr)->sh_type != SHT_NOTE) + CurrentSection = elf_nextscn(EF, CurrentSection); + + while (CurrentSection && + gelf_getshdr(CurrentSection, &Shdr)->sh_type != SHT_NOTE) + CurrentSection = elf_nextscn(EF, CurrentSection); + + if (!CurrentSection) { + // No more sections. + // Note that NPtr is already nullptr indicating the end() iterator. + return; + } + + gelf_getshdr(CurrentSection, &Shdr); + uint64_t NotesBegin = getNotesBeginAddr(Shdr); + uint64_t NotesEnd = getNotesEndAddr(Shdr); + if (NotesBegin >= NotesEnd) { + // Something went wrong. Assume that we've reached + // the end of all notes. + CurrentSection = nullptr; + NPtr = nullptr; + return; + } + + NPtr = reinterpret_cast(NotesBegin); + assert(NPtr && "Invalid SHT_NOTE section."); + } + + bool operator!=(const ElfLSectionNoteIteratorImpl Other) const { + return !(*this == Other); + } + +public: + ElfLSectionNoteIteratorImpl(Elf *RawElf, bool IsEnd = false) : EF(RawElf) { + assert(EF && "Trying to iterate invalid ELF."); + + if (IsEnd) { + // NPtr equal to nullptr means end() iterator. + return; + } + + // Set CurrentSection to the very first section, + // and let autoAdvance() find the first valid note (if any). + CurrentSection = elf_getscn(EF, 0); + autoAdvance(true); + } + + bool operator==(const ElfLSectionNoteIteratorImpl Other) const { + // They should be pointing to the same NOTE to be equal. + return NPtr == Other.NPtr; + } + + const Elf_Note *operator*() const { + assert(*this != ElfLSectionNoteIteratorImpl(EF, true) && + "Dereferencing the end iterator."); + return NPtr; + } + + // Advance to the next NOTE in the CurrentSection. + // If there is no next NOTE, then autoAdvance() to the next + // SHT_NOTE section and its first NOTE. + ElfLSectionNoteIteratorImpl &operator++() { + assert(*this != ElfLSectionNoteIteratorImpl(EF, true) && + "Incrementing the end iterator."); + + GElf_Shdr Shdr; + gelf_getshdr(CurrentSection, &Shdr); + uint64_t NotesBegin = getNotesBeginAddr(Shdr); + uint64_t NotesEnd = getNotesEndAddr(Shdr); + assert(reinterpret_cast(NPtr) >= NotesBegin && + reinterpret_cast(NPtr) < NotesEnd && + "Invalid pointer to a note computed somewhere else."); + (void)NotesBegin; + + uint64_t NoteSize = getNoteSize(*NPtr); + NPtr = + reinterpret_cast(reinterpret_cast(NPtr) + NoteSize); + if (reinterpret_cast(NPtr) >= NotesEnd || + reinterpret_cast(NPtr) + sizeof(*NPtr) >= NotesEnd) { + // We've reached the end of the current NOTE section. + NPtr = nullptr; + } + + // Auto advance to the next section, if needed. + autoAdvance(); + return *this; + } +}; + +// Implementation of the iterator for PT_NOTE segments. +// The iterator allows processing all NOTEs in all PT_NOTE segments +// in the ELF image provided during the iterator object construction. +class ElfLSegmentNoteIteratorImpl { + // A pointer to Elf object created by elf_memory() for + // the ELF image we are going to iterate. + Elf *EF; + + // A pointer to the current PT_NOTE segment. + // In the initial state of the iterator object this will actually + // point to the very first segment in the ELF image, but it will be + // adjusted right away either to point to the first PT_NOTE segment + // or set to nullptr (if there are no PT_NOTE segments). + size_t NumberOfPhdrs = (std::numeric_limits::max)(); + size_t CurrentSegment = (std::numeric_limits::max)(); + + // A pointer to the current NOTE inside a PT_NOTE segment + // pointed by CurrentSegment. If it is nullptr, then this means + // that the iterator object is an end() iterator. + Elf_Note *NPtr = nullptr; + + uint64_t getNotesBeginAddr(const GElf_Phdr &Phdr) const { + return reinterpret_cast(elf_rawfile(EF, nullptr)) + Phdr.p_offset; + } + + uint64_t getNotesEndAddr(const GElf_Phdr &Phdr) const { + return getNotesBeginAddr(Phdr) + Phdr.p_filesz; + } + + uint64_t getNoteSize(const Elf_Note &Note) const { + return sizeof(Note) + alignUp(Note.n_namesz, NoteAlignment) + + alignUp(Note.n_descsz, NoteAlignment); + } + + // Given the current state of the iterator object, advances + // the iterator forward to point to the next NOTE in the next + // PT_NOTE segment. + // If there is no such a NOTE, then it sets the iterator + // object to the end() state. + // + // Note that this method does not change the iterator, if + // NPtr is pointing to a valid note within CurrentSegment. + // The iterator advancement in this case is done via operator++. + void autoAdvance(bool IsFirst = false) { + // Cannot advance, if CurrentSegment is invalid. + if (CurrentSegment >= NumberOfPhdrs) + return; + + // NPtr points to a valid NOTE in CurrentSegment, thus, + // no auto advancement. + if (NPtr) + return; + + GElf_Phdr Phdr; + gelf_getphdr(EF, CurrentSegment, &Phdr); + + // CurrentSegment is a valid segment, and NPtr is an end() iterator. + // + // If IsFirst is true, then we just in the initial state, and + // we need to set CurrentSegment to the first PT_NOTE segment (if any), + // and, then, NPtr to the first note in this segment. + // + // If IsFirst is false, then we've reached the end of the current + // PT_NOTE segment, and should find the next segment with notes. + if (!IsFirst || Phdr.p_type != PT_NOTE) + ++CurrentSegment; + + while (CurrentSegment < NumberOfPhdrs) { + if (gelf_getphdr(EF, CurrentSegment, &Phdr) != &Phdr) + continue; + + if (Phdr.p_type == PT_NOTE) + break; + + ++CurrentSegment; + } + + if (CurrentSegment >= NumberOfPhdrs) { + // No more segments. + // Note that NPtr is already nullptr indicating the end() iterator. + return; + } + + if (gelf_getphdr(EF, CurrentSegment, &Phdr) != &Phdr) + assert(false && "Invalid program header selected above."); + + uint64_t NotesBegin = getNotesBeginAddr(Phdr); + uint64_t NotesEnd = getNotesEndAddr(Phdr); + if (NotesBegin >= NotesEnd) { + // Something went wrong. Assume that we've reached + // the end of all notes. + CurrentSegment = NumberOfPhdrs; + NPtr = nullptr; + return; + } + + NPtr = reinterpret_cast(NotesBegin); + assert(NPtr && "Invalid PT_NOTE segment."); + } + + bool operator!=(const ElfLSegmentNoteIteratorImpl Other) const { + return !(*this == Other); + } + +public: + ElfLSegmentNoteIteratorImpl(Elf *RawElf, bool IsEnd = false) : EF(RawElf) { + assert(EF && "Trying to iterate invalid ELF."); + + if (IsEnd) { + // NPtr equal to nullptr means end() iterator. + return; + } + + // Set CurrentSegment to the very first segment, + // and let autoAdvance() find the first valid note (if any). + CurrentSegment = 0; + + // Set NumberOfPhdrs to 0, if we cannot query it. + if (elf_getphdrnum(EF, &NumberOfPhdrs) != 0) + NumberOfPhdrs = 0; + autoAdvance(true); + } + + bool operator==(const ElfLSegmentNoteIteratorImpl Other) const { + // They should be pointing to the same NOTE to be equal. + return NPtr == Other.NPtr; + } + + const Elf_Note *operator*() const { + assert(*this != ElfLSegmentNoteIteratorImpl(EF, true) && + "Dereferencing the end iterator."); + return NPtr; + } + + // Advance to the next NOTE in the CurrentSegment. + // If there is no next NOTE, then autoAdvance() to the next + // PT_NOTE segment and its first NOTE. + ElfLSegmentNoteIteratorImpl &operator++() { + assert(*this != ElfLSegmentNoteIteratorImpl(EF, true) && + "Incrementing the end iterator."); + + GElf_Phdr Phdr; + gelf_getphdr(EF, CurrentSegment, &Phdr); + uint64_t NotesBegin = getNotesBeginAddr(Phdr); + uint64_t NotesEnd = getNotesEndAddr(Phdr); + assert(reinterpret_cast(NPtr) >= NotesBegin && + reinterpret_cast(NPtr) < NotesEnd && + "Invalid pointer to a note computed somewhere else."); + (void)NotesBegin; + + uint64_t NoteSize = getNoteSize(*NPtr); + NPtr = + reinterpret_cast(reinterpret_cast(NPtr) + NoteSize); + if (reinterpret_cast(NPtr) >= NotesEnd || + reinterpret_cast(NPtr) + sizeof(*NPtr) >= NotesEnd) { + // We've reached the end of the current NOTE section. + NPtr = nullptr; + } + + // Auto advance to the next section, if needed. + autoAdvance(); + return *this; + } +}; + +class ElfLSectionImpl { + Elf *EF = nullptr; + Elf_Scn *Section = nullptr; + +public: + ElfLSectionImpl(Elf *EF, Elf_Scn *Section) : EF(EF), Section(Section) {} + + const char *getName() const { + size_t SHStrNdx; + if (elf_getshdrstrndx(EF, &SHStrNdx) != 0) + return ""; + + GElf_Shdr Shdr; + gelf_getshdr(Section, &Shdr); + char *Name = elf_strptr(EF, SHStrNdx, static_cast(Shdr.sh_name)); + return Name ? Name : ""; + } + + uint64_t getSize() const { + Elf_Data *Desc = elf_rawdata(Section, nullptr); + if (!Desc) + return 0; + + return Desc->d_size; + } + + const uint8_t *getContents() const { + Elf_Data *Desc = elf_rawdata(Section, nullptr); + if (!Desc) + return 0; + + return reinterpret_cast(Desc->d_buf); + } +}; + +class ElfLSectionIteratorImpl { + // A pointer to Elf object created by elf_memory() for + // the ELF image we are going to iterate. + Elf *EF = nullptr; + + // A pointer to the current section. + Elf_Scn *CurrentSection = nullptr; + + bool operator!=(const ElfLSectionIteratorImpl Other) const { + return !(*this == Other); + } + +public: + ElfLSectionIteratorImpl(Elf *RawElf, bool IsEnd = false) : EF(RawElf) { + assert(EF && "Trying to iterate invalid ELF."); + if (IsEnd) + return; + + CurrentSection = elf_getscn(EF, 0); + } + + bool operator==(const ElfLSectionIteratorImpl Other) const { + return CurrentSection == Other.CurrentSection; + } + + ElfLSectionImpl *operator*() const { + assert(*this != ElfLSectionIteratorImpl(EF, true) && + "Dereferencing the end iterator."); + return new ElfLSectionImpl(EF, CurrentSection); + } + + ElfLSectionIteratorImpl &operator++() { + assert(*this != ElfLSectionIteratorImpl(EF, true) && + "Dereferencing the end iterator."); + + CurrentSection = elf_nextscn(EF, CurrentSection); + return *this; + } +}; + +// Actual implementation of ElfL via libelf. +// It is constructed from an ELF image defined by its +// starting pointer in memory and a length in bytes. +class ElfLImpl { + // A pointer to Elf object created by elf_memory() for + // the ELF image. + Elf *EF = nullptr; + + // Class of the ELF image. + unsigned ElfClass = ELFCLASSNONE; + + // A pointer to the ELF image's header. + // Depending on the class it may be either 'Elf32_Ehdr *' + // or 'Elf64_Ehdr '. + const void *Header = nullptr; + + // Let the owning object access this. + friend class ElfL; + +public: + ElfLImpl(Elf *RawElf, unsigned ElfClass, const void *Header) + : EF(RawElf), ElfClass(ElfClass), Header(Header) {} + + // Allocates and constructs a new iterator for NOTEs in + // SHT_NOTE sections of the ELF image. + ElfLSectionNoteIteratorImpl * + createSectionNoteIteratorImpl(bool IsEnd) const { + return new ElfLSectionNoteIteratorImpl(EF, IsEnd); + } + + // Allocates and constructs a new iterator for NOTEs in + // PT_NOTE segments of the ELF image. + ElfLSegmentNoteIteratorImpl * + createSegmentNoteIteratorImpl(bool IsEnd) const { + return new ElfLSegmentNoteIteratorImpl(EF, IsEnd); + } + + ElfLSectionIteratorImpl * + createSectionIteratorImpl(bool IsEnd) const { + return new ElfLSectionIteratorImpl(EF, IsEnd); + } +}; + +ElfL::ElfL(char *Begin, size_t Size) { + Elf *ElfHandle = elf_memory(Begin, Size); + if (!ElfHandle) { + elf_end(ElfHandle); + return; + } + + const Elf32_Ehdr *Header32 = elf32_getehdr(ElfHandle); + const Elf64_Ehdr *Header64 = elf64_getehdr(ElfHandle); + + if (!Header32 == !Header64) { + // Ambiguous ELF header or unrecognized ELF image. + elf_end(ElfHandle); + return; + } + + const void *Header = nullptr; + unsigned ElfClass = ELFCLASSNONE; + + if (Header32) { + ElfClass = ELFCLASS32; + Header = reinterpret_cast(Header32); + } else { + ElfClass = ELFCLASS64; + Header = reinterpret_cast(Header64); + } + + Impl = reinterpret_cast(new ElfLImpl(ElfHandle, ElfClass, Header)); +} + +ElfL::~ElfL() { + if (Impl) { + ElfLImpl *EImpl = reinterpret_cast(Impl); + elf_end(EImpl->EF); + delete EImpl; + } +} + +bool ElfL::isValidElf() const { + ElfLImpl *EImpl = reinterpret_cast(Impl); + return Impl && EImpl->Header && EImpl->ElfClass != ELFCLASSNONE; +} + +const char *ElfL::getErrmsg(int N) const { return elf_errmsg(-1); } + +uint16_t ElfL::getEMachine() const { + assert(isValidElf() && "Invalid ELF."); + ElfLImpl *EImpl = reinterpret_cast(Impl); + if (EImpl->ElfClass == ELFCLASS32) + return reinterpret_cast(EImpl->Header)->e_machine; + else if (EImpl->ElfClass == ELFCLASS64) + return reinterpret_cast(EImpl->Header)->e_machine; + else + assert(false && "Unsupported ELF class."); + + return EM_NONE; +} + +uint16_t ElfL::getEType() const { + assert(isValidElf() && "Invalid ELF."); + ElfLImpl *EImpl = reinterpret_cast(Impl); + if (EImpl->ElfClass == ELFCLASS32) + return reinterpret_cast(EImpl->Header)->e_type; + else if (EImpl->ElfClass == ELFCLASS64) + return reinterpret_cast(EImpl->Header)->e_type; + else + assert(false && "Unsupported ELF class."); + + return ET_NONE; +} + +bool ElfL::isDynType(uint16_t Ty) { return Ty == ET_DYN; } + +ElfLSectionNoteIterator ElfL::section_notes_begin() const { + return ElfLSectionNoteIterator(reinterpret_cast(Impl)); +} + +ElfLSectionNoteIterator ElfL::section_notes_end() const { + return ElfLSectionNoteIterator(reinterpret_cast(Impl), + true); +} + +ElfLSectionNoteIterator::ElfLSectionNoteIterator(const void *I, bool IsEnd) { + const ElfLImpl *EImpl = reinterpret_cast(I); + Impl = EImpl->createSectionNoteIteratorImpl(IsEnd); +} + +ElfLSectionNoteIterator::ElfLSectionNoteIterator( + const ElfLSectionNoteIterator &Other) { + ElfLSectionNoteIteratorImpl *IImpl = + reinterpret_cast(Other.Impl); + Impl = new ElfLSectionNoteIteratorImpl(*IImpl); +} + +ElfLSectionNoteIterator::~ElfLSectionNoteIterator() { + assert(Impl && "Invalid ElfLSectionNoteIterator object."); + ElfLSectionNoteIteratorImpl *IImpl = + reinterpret_cast(Impl); + delete IImpl; +} + +bool ElfLSectionNoteIterator::operator==( + const ElfLSectionNoteIterator Other) const { + const ElfLSectionNoteIteratorImpl *Lhs = + reinterpret_cast(Impl); + const ElfLSectionNoteIteratorImpl *Rhs = + reinterpret_cast(Other.Impl); + return (*Lhs == *Rhs); +} + +bool ElfLSectionNoteIterator::operator!=( + const ElfLSectionNoteIterator Other) const { + return !(*this == Other); +} + +ElfLSectionNoteIterator &ElfLSectionNoteIterator::operator++() { + ElfLSectionNoteIteratorImpl *IImpl = + reinterpret_cast(Impl); + ++(*IImpl); + return *this; +} + +ElfLNote ElfLSectionNoteIterator::operator*() const { + ElfLSectionNoteIteratorImpl *IImpl = + reinterpret_cast(Impl); + return ElfLNote(**IImpl); +} + +ElfLSegmentNoteIterator ElfL::segment_notes_begin() const { + return ElfLSegmentNoteIterator(reinterpret_cast(Impl)); +} + +ElfLSegmentNoteIterator ElfL::segment_notes_end() const { + return ElfLSegmentNoteIterator(reinterpret_cast(Impl), + true); +} + +ElfLSegmentNoteIterator::ElfLSegmentNoteIterator(const void *I, bool IsEnd) { + const ElfLImpl *EImpl = reinterpret_cast(I); + Impl = EImpl->createSegmentNoteIteratorImpl(IsEnd); +} + +ElfLSegmentNoteIterator::ElfLSegmentNoteIterator( + const ElfLSegmentNoteIterator &Other) { + ElfLSegmentNoteIteratorImpl *IImpl = + reinterpret_cast(Other.Impl); + Impl = new ElfLSegmentNoteIteratorImpl(*IImpl); +} + +ElfLSegmentNoteIterator::~ElfLSegmentNoteIterator() { + assert(Impl && "Invalid ElfLSegmentNoteIterator object."); + ElfLSegmentNoteIteratorImpl *IImpl = + reinterpret_cast(Impl); + delete IImpl; +} + +bool ElfLSegmentNoteIterator::operator==( + const ElfLSegmentNoteIterator Other) const { + const ElfLSegmentNoteIteratorImpl *Lhs = + reinterpret_cast(Impl); + const ElfLSegmentNoteIteratorImpl *Rhs = + reinterpret_cast(Other.Impl); + return (*Lhs == *Rhs); +} + +bool ElfLSegmentNoteIterator::operator!=( + const ElfLSegmentNoteIterator Other) const { + return !(*this == Other); +} + +ElfLSegmentNoteIterator &ElfLSegmentNoteIterator::operator++() { + ElfLSegmentNoteIteratorImpl *IImpl = + reinterpret_cast(Impl); + ++(*IImpl); + return *this; +} + +ElfLNote ElfLSegmentNoteIterator::operator*() const { + ElfLSegmentNoteIteratorImpl *IImpl = + reinterpret_cast(Impl); + return ElfLNote(**IImpl); +} + +ElfLNote::ElfLNote(const void *I) { + // ElfLNote::Impl is a pointer to Elf_Note in this implementation. + // A pointer to Elf_Note is returned by + // ElfLSectionNoteIteratorImpl::operator*(). + Impl = I; +} + +ElfLNote::ElfLNote(const ElfLNote &Other) { Impl = Other.Impl; } + +ElfLNote::~ElfLNote() {} + +uint64_t ElfLNote::getNameSize() const { + const Elf_Note *Note = reinterpret_cast(Impl); + if (Note->n_namesz == 0) + return 0; + // libelf returns name size that accounts for the null terminator. + // ELF light interface returns the size ignoring it. + return Note->n_namesz - 1; +} + +const char *ElfLNote::getName() const { + const Elf_Note *Note = reinterpret_cast(Impl); + return reinterpret_cast(Note) + sizeof(*Note); +} + +uint64_t ElfLNote::getDescSize() const { + const Elf_Note *Note = reinterpret_cast(Impl); + return Note->n_descsz; +} + +const uint8_t *ElfLNote::getDesc() const { + const Elf_Note *Note = reinterpret_cast(Impl); + return reinterpret_cast(Note) + sizeof(*Note) + + alignUp(getNameSize(), NoteAlignment); +} + +uint64_t ElfLNote::getType() const { + const Elf_Note *Note = reinterpret_cast(Impl); + return Note->n_type; +} + +ElfLSection::ElfLSection(const void *I) { + Impl = I; +} + +ElfLSection::ElfLSection(const ElfLSection &Other) { + const ElfLSectionImpl *SImpl = + reinterpret_cast(Other.Impl); + Impl = new ElfLSectionImpl(*SImpl); +} + +ElfLSection::~ElfLSection() { + const ElfLSectionImpl *SImpl = + reinterpret_cast(Impl); + delete SImpl; +} + +const char *ElfLSection::getName() const { + const ElfLSectionImpl *SImpl = + reinterpret_cast(Impl); + return SImpl->getName(); +} + +uint64_t ElfLSection::getSize() const { + const ElfLSectionImpl *SImpl = + reinterpret_cast(Impl); + return SImpl->getSize(); +} + +const uint8_t *ElfLSection::getContents() const { + const ElfLSectionImpl *SImpl = + reinterpret_cast(Impl); + return SImpl->getContents(); +} + +ElfLSectionIterator ElfL::sections_begin() const { + return ElfLSectionIterator(Impl); +} + +ElfLSectionIterator ElfL::sections_end() const { + return ElfLSectionIterator(Impl, true); +} + +ElfLSectionIterator::ElfLSectionIterator(const void *I, bool IsEnd) { + const ElfLImpl *EImpl = reinterpret_cast(I); + Impl = EImpl->createSectionIteratorImpl(IsEnd); +} + +ElfLSectionIterator::ElfLSectionIterator( + const ElfLSectionIterator &Other) { + ElfLSectionIteratorImpl *IImpl = + reinterpret_cast(Other.Impl); + Impl = new ElfLSectionIteratorImpl(*IImpl); +} + +ElfLSectionIterator::~ElfLSectionIterator() { + assert(Impl && "Invalid ElfLSectionIterator object."); + ElfLSectionIteratorImpl *IImpl = + reinterpret_cast(Impl); + delete IImpl; +} + +bool ElfLSectionIterator::operator==( + const ElfLSectionIterator Other) const { + const ElfLSectionIteratorImpl *Lhs = + reinterpret_cast(Impl); + const ElfLSectionIteratorImpl *Rhs = + reinterpret_cast(Other.Impl); + return (*Lhs == *Rhs); +} + +bool ElfLSectionIterator::operator!=( + const ElfLSectionIterator Other) const { + return !(*this == Other); +} + +ElfLSectionIterator &ElfLSectionIterator::operator++() { + ElfLSectionIteratorImpl *IImpl = + reinterpret_cast(Impl); + ++(*IImpl); + return *this; +} + +ElfLSection ElfLSectionIterator::operator*() const { + ElfLSectionIteratorImpl *IImpl = + reinterpret_cast(Impl); + return ElfLSection(**IImpl); +} +#else // !MAY_USE_LIBELF + +// Implementation based on LLVM ELF binary format. +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; + +class ElfLNoteImplBase { +public: + virtual ~ElfLNoteImplBase() = default; + virtual ElfLNoteImplBase *clone() const = 0; + virtual size_t getNameSize() const = 0; + virtual const char *getName() const = 0; + virtual size_t getDescSize() const = 0; + virtual const uint8_t *getDesc() const = 0; + virtual uint32_t getType() const = 0; +}; + +template class ElfLNoteImpl : public ElfLNoteImplBase { + using Elf_Note = typename ELFT::Note; + const Elf_Note Note; + +public: + ElfLNoteImpl(const Elf_Note Note) : Note(Note) {} + ElfLNoteImpl(const ElfLNoteImpl &) = default; + ElfLNoteImplBase *clone() const override { return new ElfLNoteImpl(*this); } + ~ElfLNoteImpl() = default; + size_t getNameSize() const override { return Note.getName().size(); } + const char *getName() const override { return Note.getName().data(); } + size_t getDescSize() const override { return Note.getDesc().size(); } + const uint8_t *getDesc() const override { return Note.getDesc().data(); } + uint32_t getType() const override { return Note.getType(); } +}; + +class ElfLNoteIteratorImplBase { +protected: + const endianness TargetEndianness; + const bool Is64Bits; + const bool IsSectionIterator; + + ElfLNoteIteratorImplBase(endianness TargetEndianness, bool Is64Bits, + bool IsSectionIterator) + : TargetEndianness(TargetEndianness), Is64Bits(Is64Bits), + IsSectionIterator(IsSectionIterator) {} + +public: + ElfLNoteIteratorImplBase(const ElfLNoteIteratorImplBase &) = default; + virtual ~ElfLNoteIteratorImplBase() = default; + virtual ElfLNoteIteratorImplBase *clone() const = 0; + virtual ElfLNoteIteratorImplBase &operator++() = 0; + virtual bool operator==(const ElfLNoteIteratorImplBase &) const = 0; + virtual ElfLNoteImplBase *operator*() const = 0; + + endianness getEndianness() const { return TargetEndianness; } + + bool is64Bits() const { return Is64Bits; } + + bool isSectionIterator() const { return IsSectionIterator; } +}; + +template +class ElfLNoteIteratorImpl : public ElfLNoteIteratorImplBase { +protected: + using NoteIterator = typename ELFT::NoteIterator; + + const ELFFile &EF; + NoteIterator NotesIt; + Error &Err; + + explicit ElfLNoteIteratorImpl(const ELFFile &EF, Error &Err, + bool IsSectionIterator) + : ElfLNoteIteratorImplBase(ELFT::TargetEndianness, ELFT::Is64Bits, + IsSectionIterator), + EF(EF), NotesIt(EF.notes_end()), Err(Err) {} + +public: + ElfLNoteIteratorImpl(const ElfLNoteIteratorImpl &) = default; + virtual ~ElfLNoteIteratorImpl() = default; + + static bool classof(const ElfLNoteIteratorImplBase *B) { + return (B->getEndianness() == ELFT::TargetEndianness && + B->is64Bits() == ELFT::Is64Bits); + } +}; + +template +class ElfLSectionNoteIteratorImpl : public ElfLNoteIteratorImpl { + using Elf_Shdr = typename ELFT::Shdr; + using Elf_Shdr_Range = typename ELFT::ShdrRange; + using NoteIterator = typename ElfLNoteIteratorImpl::NoteIterator; + using SectionsIteratorTy = typename Elf_Shdr_Range::iterator; + + SectionsIteratorTy SectionsIt; + + const ELFFile &getEF() const { return this->EF; } + const NoteIterator &getNotesIt() const { return this->NotesIt; } + Error &getErr() const { return this->Err; } + NoteIterator &getNotesIt() { return this->NotesIt; } + SectionsIteratorTy section_begin() const { + Expected Sections = getEF().sections(); + if (!Sections) + return SectionsIteratorTy(); + + return Sections->begin(); + } + + SectionsIteratorTy section_end() const { + Expected Sections = getEF().sections(); + if (!Sections) + return SectionsIteratorTy(); + + return Sections->end(); + } + + bool isEqual(const ElfLSectionNoteIteratorImpl &Lhs, + const ElfLSectionNoteIteratorImpl &Rhs) const { + // Check for end() iterators, first. + if (Lhs.SectionsIt == section_end() && Rhs.SectionsIt == section_end()) + return true; + + if (Lhs.SectionsIt != Rhs.SectionsIt) + return false; + + return Lhs.getNotesIt() == Rhs.getNotesIt(); + } + + void autoAdvance(bool IsFirst = false) { + if (SectionsIt == section_end()) + return; + + if (getNotesIt() != getEF().notes_end()) + return; + + // SectionsIt is not an end iterator, and NotesIt is an end() + // iterator. + // + // If IsFirst is true, then we just in the initial state, and + // we need to set SectionsIt to the first SHT_NOTE section (if any), + // and, then, NotesIt to the first note in this section. + // + // If IsFirst is false, then we've reached the end of the current + // SHT_NOTE section, and should find the next section with notes. + if (!IsFirst || SectionsIt->sh_type != ELF::SHT_NOTE) + ++SectionsIt; + + while (SectionsIt != section_end() && + SectionsIt->sh_type != ELF::SHT_NOTE) { + ++SectionsIt; + } + + if (SectionsIt == section_end()) { + // No more sections. + return; + } + + const Elf_Shdr &Section = *SectionsIt; + getNotesIt() = getEF().notes_begin(Section, getErr()); + + // Auto advance the iterator, if the NOTE section + // does not contain any notes (e.g. some error happened + // during the note parsing). + autoAdvance(); + } + + bool operator!=(const ElfLSectionNoteIteratorImpl &Other) const { + return !(*this == Other); + } + +public: + ElfLSectionNoteIteratorImpl(const ELFFile &EF, Error &Err, + bool IsEnd = false) + : ElfLNoteIteratorImpl(EF, Err, true) { + if (IsEnd) { + SectionsIt = section_end(); + // It is an end() iterator, if SectionsIt is an end() iterator. + return; + } + + SectionsIt = section_begin(); + autoAdvance(true); + } + + ElfLSectionNoteIteratorImpl(const ElfLSectionNoteIteratorImpl &Copy) = + default; + + ElfLNoteIteratorImplBase *clone() const override { + return new ElfLSectionNoteIteratorImpl(*this); + } + + bool operator==(const ElfLNoteIteratorImplBase &Other) const override { + if (const ElfLSectionNoteIteratorImpl *OPtr = + dyn_cast(&Other)) { + return isEqual(*this, *OPtr); + } + return false; + } + + ElfLSectionNoteIteratorImpl &operator++() override { + assert(*this != ElfLSectionNoteIteratorImpl(getEF(), getErr(), true) && + "Incrementing the end iterator."); + // Move the notes iterator within the current section. + ++getNotesIt(); + autoAdvance(); + + return *this; + } + + ElfLNoteImplBase *operator*() const override { + assert(*this != ElfLSectionNoteIteratorImpl(getEF(), getErr(), true) && + "Dereferencing the end iterator."); + return new ElfLNoteImpl(*getNotesIt()); + } + + static bool classof(const ElfLNoteIteratorImplBase *B) { + return (ElfLNoteIteratorImpl::classof(B) && + B->isSectionIterator() == true); + } +}; + +template +class ElfLSegmentNoteIteratorImpl : public ElfLNoteIteratorImpl { + using Elf_Phdr = typename ELFT::Phdr; + using Elf_Phdr_Range = typename ELFT::PhdrRange; + using NoteIterator = typename ElfLNoteIteratorImpl::NoteIterator; + using SegmentIteratorTy = typename Elf_Phdr_Range::iterator; + + SegmentIteratorTy SegmentsIt; + + const ELFFile &getEF() const { return this->EF; } + const NoteIterator &getNotesIt() const { return this->NotesIt; } + Error &getErr() const { return this->Err; } + NoteIterator &getNotesIt() { return this->NotesIt; } + SegmentIteratorTy segment_begin() const { + Expected Segments = getEF().program_headers(); + if (!Segments) + return SegmentIteratorTy(); + + return Segments->begin(); + } + + SegmentIteratorTy segment_end() const { + Expected Segments = getEF().program_headers(); + if (!Segments) + return SegmentIteratorTy(); + + return Segments->end(); + } + + bool isEqual(const ElfLSegmentNoteIteratorImpl &Lhs, + const ElfLSegmentNoteIteratorImpl &Rhs) const { + // Check for end() iterators, first. + if (Lhs.SegmentsIt == segment_end() && Rhs.SegmentsIt == segment_end()) + return true; + + if (Lhs.SegmentsIt != Rhs.SegmentsIt) + return false; + + return Lhs.getNotesIt() == Rhs.getNotesIt(); + } + + void autoAdvance(bool IsFirst = false) { + if (SegmentsIt == segment_end()) + return; + + if (getNotesIt() != getEF().notes_end()) + return; + + // SegmentsIt is not an end iterator, and NotesIt is an end() + // iterator. + // + // If IsFirst is true, then we just in the initial state, and + // we need to set SegmentsIt to the first PT_NOTE segment (if any), + // and, then, NotesIt to the first note in this segment. + // + // If IsFirst is false, then we've reached the end of the current + // PT_NOTE segment, and should find the next segment with notes. + if (!IsFirst || SegmentsIt->p_type != ELF::SHT_NOTE) + ++SegmentsIt; + + while (SegmentsIt != segment_end() && SegmentsIt->p_type != ELF::PT_NOTE) { + ++SegmentsIt; + } + + if (SegmentsIt == segment_end()) { + // No more segments. + return; + } + + const Elf_Phdr &Segment = *SegmentsIt; + getNotesIt() = getEF().notes_begin(Segment, getErr()); + + // Auto advance the iterator, if the NOTE segment + // does not contain any notes (e.g. some error happened + // during the note parsing). + autoAdvance(); + } + + bool operator!=(const ElfLSegmentNoteIteratorImpl &Other) const { + return !(*this == Other); + } + +public: + ElfLSegmentNoteIteratorImpl(const ELFFile &EF, Error &Err, + bool IsEnd = false) + : ElfLNoteIteratorImpl(EF, Err, false) { + if (IsEnd) { + SegmentsIt = segment_end(); + // It is an end() iterator, if SegmentsIt is an end() iterator. + return; + } + + SegmentsIt = segment_begin(); + autoAdvance(true); + } + + ElfLSegmentNoteIteratorImpl(const ElfLSegmentNoteIteratorImpl &Copy) = + default; + + ElfLNoteIteratorImplBase *clone() const override { + return new ElfLSegmentNoteIteratorImpl(*this); + } + + bool operator==(const ElfLNoteIteratorImplBase &Other) const override { + if (const ElfLSegmentNoteIteratorImpl *OPtr = + dyn_cast(&Other)) { + return isEqual(*this, *OPtr); + } + return false; + } + + ElfLSegmentNoteIteratorImpl &operator++() override { + assert(*this != ElfLSegmentNoteIteratorImpl(getEF(), getErr(), true) && + "Incrementing the end iterator."); + // Move the notes iterator within the current segment. + ++getNotesIt(); + autoAdvance(); + + return *this; + } + + ElfLNoteImplBase *operator*() const override { + assert(*this != ElfLSegmentNoteIteratorImpl(getEF(), getErr(), true) && + "Dereferencing the end iterator."); + return new ElfLNoteImpl(*getNotesIt()); + } + + static bool classof(const ElfLNoteIteratorImplBase *B) { + return (ElfLNoteIteratorImpl::classof(B) && + B->isSectionIterator() == false); + } +}; + +class ElfLSectionImplBase { +public: + virtual ~ElfLSectionImplBase() = default; + virtual ElfLSectionImplBase *clone() const = 0; + virtual const char *getName() const = 0; + virtual uint64_t getSize() const = 0; + virtual const uint8_t *getContents() const = 0; +}; + +template class ElfLSectionImpl : public ElfLSectionImplBase { + using Elf_Shdr = typename ELFT::Shdr; + + const ELFFile &EF; + const Elf_Shdr &Section; + +public: + ElfLSectionImpl(const ELFFile &EF, const Elf_Shdr &Section) + : EF(EF), Section(Section) {} + ElfLSectionImpl(const ElfLSectionImpl &) = default; + ElfLSectionImpl *clone() const override { return new ElfLSectionImpl(*this); } + ~ElfLSectionImpl() = default; + + const char *getName() const override { + Expected NameOrErr = EF.getSectionName(Section); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + return ""; + } + return NameOrErr->data(); + } + + uint64_t getSize() const override { + Expected> ContentsOrErr = EF.getSectionContents(Section); + if (!ContentsOrErr) { + consumeError(ContentsOrErr.takeError()); + return 0; + } + return ContentsOrErr->size(); + } + + const uint8_t *getContents() const override { + Expected> ContentsOrErr = EF.getSectionContents(Section); + if (!ContentsOrErr) { + consumeError(ContentsOrErr.takeError()); + return 0; + } + return ContentsOrErr->data(); + } +}; + +class ElfLSectionIteratorImplBase { +protected: + const endianness TargetEndianness; + const bool Is64Bits; + + ElfLSectionIteratorImplBase(endianness TargetEndianness, bool Is64Bits) + : TargetEndianness(TargetEndianness), Is64Bits(Is64Bits) {} + +public: + ElfLSectionIteratorImplBase(const ElfLSectionIteratorImplBase &) = default; + virtual ~ElfLSectionIteratorImplBase() = default; + virtual ElfLSectionIteratorImplBase *clone() const = 0; + virtual ElfLSectionIteratorImplBase &operator++() = 0; + virtual bool operator==(const ElfLSectionIteratorImplBase &) const = 0; + virtual ElfLSectionImplBase *operator*() const = 0; + + endianness getEndianness() const { return TargetEndianness; } + + bool is64Bits() const { return Is64Bits; } +}; + +template +class ElfLSectionIteratorImpl : public ElfLSectionIteratorImplBase { + using Elf_Shdr = typename ELFT::Shdr; + using Elf_Shdr_Range = typename ELFT::ShdrRange; + using SectionsIteratorTy = typename Elf_Shdr_Range::iterator; + + const ELFFile &EF; + SectionsIteratorTy SectionsIt; + + const ELFFile &getEF() const { return EF; } + + SectionsIteratorTy section_begin() const { + Expected Sections = getEF().sections(); + if (!Sections) + return SectionsIteratorTy(); + + return Sections->begin(); + } + + SectionsIteratorTy section_end() const { + Expected Sections = getEF().sections(); + if (!Sections) + return SectionsIteratorTy(); + + return Sections->end(); + } + + bool isEqual(const ElfLSectionIteratorImpl &Lhs, + const ElfLSectionIteratorImpl &Rhs) const { + return Lhs.SectionsIt == Rhs.SectionsIt; + } + + bool operator!=(const ElfLSectionIteratorImpl Other) const { + return !(*this == Other); + } + +public: + ElfLSectionIteratorImpl(const ELFFile &EF, bool IsEnd = false) + : ElfLSectionIteratorImplBase(ELFT::TargetEndianness, ELFT::Is64Bits), + EF(EF) { + if (IsEnd) { + SectionsIt = section_end(); + return; + } + + SectionsIt = section_begin(); + } + + ElfLSectionIteratorImpl *clone() const override { + return new ElfLSectionIteratorImpl(*this); + } + + bool operator==(const ElfLSectionIteratorImplBase &Other) const override { + if (const ElfLSectionIteratorImpl *OPtr = + dyn_cast(&Other)) { + return isEqual(*this, *OPtr); + } + return false; + } + + ElfLSectionImplBase *operator*() const override { + assert(*this != ElfLSectionIteratorImpl(EF, true) && + "Dereferencing the end iterator."); + return new ElfLSectionImpl(EF, *SectionsIt); + } + + ElfLSectionIteratorImpl &operator++() override { + assert(*this != ElfLSectionIteratorImpl(EF, true) && + "Dereferencing the end iterator."); + + ++SectionsIt; + return *this; + } + + static bool classof(const ElfLSectionIteratorImplBase *B) { + return (B->getEndianness() == ELFT::TargetEndianness && + B->is64Bits() == ELFT::Is64Bits); + } +}; + +class ElfLImplBase { +public: + ElfLImplBase() = default; + ElfLImplBase(const ElfLImplBase &) = delete; + ElfLImplBase &operator=(const ElfLImplBase &) = delete; + virtual ~ElfLImplBase() = default; + virtual uint16_t getEMachine() const = 0; + virtual uint16_t getEType() const = 0; + + virtual ElfLNoteIteratorImplBase * + createSectionNoteIteratorImpl(bool IsEnd) const = 0; + virtual ElfLNoteIteratorImplBase * + createSegmentNoteIteratorImpl(bool IsEnd) const = 0; + virtual ElfLSectionIteratorImplBase * + createSectionIteratorImpl(bool IsEnd) const = 0; +}; + +template class ElfLImpl : public ElfLImplBase { + std::unique_ptr> File; + Error *Err = nullptr; + + friend class ElfL; + +public: + ElfLImpl(std::unique_ptr F) { + ObjectFile *FPtr = F.release(); + if (auto *Obj = dyn_cast>(FPtr)) + File = std::unique_ptr>(Obj); + else + assert(false && "Not an ELF object file, or ELF class is wrong."); + + Err = new Error(std::move(Error::success())); + } + ElfLImpl(const ElfLImpl &) = delete; + ElfLImpl &operator=(const ElfLImpl &) = delete; + virtual ~ElfLImpl() { + if (!Err) + return; + + if (*Err) { + auto ErrorString = toString(std::move(*Err)); + DP("Destroying ELF object parsed with errors: %s\n", ErrorString.c_str()); + } else { + delete Err; + } + Err = nullptr; + } + uint16_t getEMachine() const override { + return cast(File.get())->getEMachine(); + } + uint16_t getEType() const override { + return cast(File.get())->getEType(); + } + + ElfLNoteIteratorImplBase * + createSectionNoteIteratorImpl(bool IsEnd) const override { + return new ElfLSectionNoteIteratorImpl(File->getELFFile(), *Err, + IsEnd); + } + + ElfLNoteIteratorImplBase * + createSegmentNoteIteratorImpl(bool IsEnd) const override { + return new ElfLSegmentNoteIteratorImpl(File->getELFFile(), *Err, + IsEnd); + } + + ElfLSectionIteratorImplBase * + createSectionIteratorImpl(bool IsEnd) const override { + return new ElfLSectionIteratorImpl(File->getELFFile(), IsEnd); + } +}; + +ElfL::ElfL(char *Begin, size_t Size) { + StringRef StrBuf(Begin, Size); + std::unique_ptr MemBuf = + MemoryBuffer::getMemBuffer(StrBuf, "", false); + Expected> BinOrErr = + ObjectFile::createELFObjectFile(MemBuf->getMemBufferRef(), + /*InitContent=*/false); + if (!BinOrErr) { + consumeError(BinOrErr.takeError()); + return; + } + + if (isa(BinOrErr->get())) { + Impl = + reinterpret_cast(new ElfLImpl(std::move(*BinOrErr))); + } else if (isa(BinOrErr->get())) + Impl = + reinterpret_cast(new ElfLImpl(std::move(*BinOrErr))); + else if (isa(BinOrErr->get())) + Impl = + reinterpret_cast(new ElfLImpl(std::move(*BinOrErr))); + else if (isa(BinOrErr->get())) + Impl = + reinterpret_cast(new ElfLImpl(std::move(*BinOrErr))); +} + +ElfL::~ElfL() { + ElfLImplBase *EImpl = reinterpret_cast(Impl); + delete EImpl; +} + +bool ElfL::isValidElf() const { return Impl; } + +const char *ElfL::getErrmsg(int N) const { + // TODO: return text representation for the latest Error. + return "LLVM ELF error"; +} + +uint16_t ElfL::getEMachine() const { + assert(isValidElf() && "Invalid ELF."); + ElfLImplBase *EImpl = reinterpret_cast(Impl); + return EImpl->getEMachine(); +} + +uint16_t ElfL::getEType() const { + assert(isValidElf() && "Invalid ELF."); + ElfLImplBase *EImpl = reinterpret_cast(Impl); + return EImpl->getEType(); +} + +bool ElfL::isDynType(uint16_t Ty) { return Ty == ET_DYN; } + +ElfLSectionNoteIterator::ElfLSectionNoteIterator(const void *I, bool IsEnd) { + const ElfLImplBase *EImpl = reinterpret_cast(I); + // Create new ElfLSectionNoteIteratorImpl object. + Impl = EImpl->createSectionNoteIteratorImpl(IsEnd); +} + +ElfLSectionNoteIterator::~ElfLSectionNoteIterator() { + const ElfLNoteIteratorImplBase *IImpl = + reinterpret_cast(Impl); + delete IImpl; +} + +ElfLSectionNoteIterator::ElfLSectionNoteIterator( + const ElfLSectionNoteIterator &Other) { + const ElfLNoteIteratorImplBase *IImpl = + reinterpret_cast(Other.Impl); + Impl = IImpl->clone(); +} + +bool ElfLSectionNoteIterator::operator==( + const ElfLSectionNoteIterator Other) const { + const ElfLNoteIteratorImplBase *Lhs = + reinterpret_cast(Impl); + const ElfLNoteIteratorImplBase *Rhs = + reinterpret_cast(Other.Impl); + return (*Lhs == *Rhs); +} + +bool ElfLSectionNoteIterator::operator!=( + const ElfLSectionNoteIterator Other) const { + return !(*this == Other); +} + +ElfLSectionNoteIterator &ElfLSectionNoteIterator::operator++() { + ElfLNoteIteratorImplBase *EImpl = + reinterpret_cast(Impl); + ++(*EImpl); + return *this; +} + +ElfLNote ElfLSectionNoteIterator::operator*() const { return ElfLNote(Impl); } + +ElfLSectionNoteIterator ElfL::section_notes_begin() const { + assert(isValidElf() && "Invalid ELF."); + return ElfLSectionNoteIterator(reinterpret_cast(Impl)); +} + +ElfLSectionNoteIterator ElfL::section_notes_end() const { + assert(isValidElf() && "Invalid ELF."); + return ElfLSectionNoteIterator(reinterpret_cast(Impl), + true); +} + +ElfLSegmentNoteIterator::ElfLSegmentNoteIterator(const void *I, bool IsEnd) { + const ElfLImplBase *EImpl = reinterpret_cast(I); + // Create new ElfLSegmentNoteIteratorImpl object. + Impl = EImpl->createSegmentNoteIteratorImpl(IsEnd); +} + +ElfLSegmentNoteIterator::~ElfLSegmentNoteIterator() { + const ElfLNoteIteratorImplBase *IImpl = + reinterpret_cast(Impl); + delete IImpl; +} + +ElfLSegmentNoteIterator::ElfLSegmentNoteIterator( + const ElfLSegmentNoteIterator &Other) { + const ElfLNoteIteratorImplBase *IImpl = + reinterpret_cast(Other.Impl); + Impl = IImpl->clone(); +} + +bool ElfLSegmentNoteIterator::operator==( + const ElfLSegmentNoteIterator Other) const { + const ElfLNoteIteratorImplBase *Lhs = + reinterpret_cast(Impl); + const ElfLNoteIteratorImplBase *Rhs = + reinterpret_cast(Other.Impl); + return (*Lhs == *Rhs); +} + +bool ElfLSegmentNoteIterator::operator!=( + const ElfLSegmentNoteIterator Other) const { + return !(*this == Other); +} + +ElfLSegmentNoteIterator &ElfLSegmentNoteIterator::operator++() { + ElfLNoteIteratorImplBase *EImpl = + reinterpret_cast(Impl); + ++(*EImpl); + return *this; +} + +ElfLNote ElfLSegmentNoteIterator::operator*() const { return ElfLNote(Impl); } + +ElfLSegmentNoteIterator ElfL::segment_notes_begin() const { + assert(isValidElf() && "Invalid ELF."); + return ElfLSegmentNoteIterator(reinterpret_cast(Impl)); +} + +ElfLSegmentNoteIterator ElfL::segment_notes_end() const { + assert(isValidElf() && "Invalid ELF."); + return ElfLSegmentNoteIterator(reinterpret_cast(Impl), + true); +} + +ElfLNote::ElfLNote(const void *IteratorImpl) { + const ElfLNoteIteratorImplBase *IImpl = + reinterpret_cast(IteratorImpl); + Impl = **IImpl; +} + +ElfLNote::ElfLNote(const ElfLNote &Other) { + const ElfLNoteImplBase *NImpl = + reinterpret_cast(Impl); + if (NImpl) + Impl = NImpl->clone(); +} + +ElfLNote::~ElfLNote() { + const ElfLNoteImplBase *NImpl = + reinterpret_cast(Impl); + delete NImpl; +} + +uint64_t ElfLNote::getNameSize() const { + const ElfLNoteImplBase *NImpl = + reinterpret_cast(Impl); + return NImpl->getNameSize(); +} + +const char *ElfLNote::getName() const { + const ElfLNoteImplBase *NImpl = + reinterpret_cast(Impl); + return NImpl->getName(); +} + +uint64_t ElfLNote::getDescSize() const { + const ElfLNoteImplBase *NImpl = + reinterpret_cast(Impl); + return NImpl->getDescSize(); +} + +const uint8_t *ElfLNote::getDesc() const { + const ElfLNoteImplBase *NImpl = + reinterpret_cast(Impl); + return NImpl->getDesc(); +} + +uint64_t ElfLNote::getType() const { + const ElfLNoteImplBase *NImpl = + reinterpret_cast(Impl); + return NImpl->getType(); +} + +ElfLSection::ElfLSection(const void *I) { + Impl = I; +} + +ElfLSection::ElfLSection(const ElfLSection &Other) { + const ElfLSectionImplBase *SImpl = + reinterpret_cast(Other.Impl); + Impl = SImpl->clone(); +} + +ElfLSection::~ElfLSection() { + const ElfLSectionImplBase *SImpl = + reinterpret_cast(Impl); + delete SImpl; +} + +const char *ElfLSection::getName() const { + const ElfLSectionImplBase *SImpl = + reinterpret_cast(Impl); + return SImpl->getName(); +} + +uint64_t ElfLSection::getSize() const { + const ElfLSectionImplBase *SImpl = + reinterpret_cast(Impl); + return SImpl->getSize(); +} + +const uint8_t *ElfLSection::getContents() const { + const ElfLSectionImplBase *SImpl = + reinterpret_cast(Impl); + return SImpl->getContents(); +} + +ElfLSectionIterator ElfL::sections_begin() const { + assert(isValidElf() && "Invalid ELF."); + return ElfLSectionIterator(reinterpret_cast(Impl)); +} + +ElfLSectionIterator ElfL::sections_end() const { + assert(isValidElf() && "Invalid ELF."); + return ElfLSectionIterator(reinterpret_cast(Impl), + true); +} + +ElfLSectionIterator::ElfLSectionIterator(const void *I, bool IsEnd) { + const ElfLImplBase *EImpl = reinterpret_cast(I); + Impl = EImpl->createSectionIteratorImpl(IsEnd); +} + +ElfLSectionIterator::ElfLSectionIterator( + const ElfLSectionIterator &Other) { + ElfLSectionIteratorImplBase *IImpl = + reinterpret_cast(Other.Impl); + Impl = IImpl->clone(); +} + +ElfLSectionIterator::~ElfLSectionIterator() { + ElfLSectionIteratorImplBase *IImpl = + reinterpret_cast(Impl); + delete IImpl; +} + +bool ElfLSectionIterator::operator==( + const ElfLSectionIterator Other) const { + const ElfLSectionIteratorImplBase *Lhs = + reinterpret_cast(Impl); + const ElfLSectionIteratorImplBase *Rhs = + reinterpret_cast(Other.Impl); + return (*Lhs == *Rhs); +} + +bool ElfLSectionIterator::operator!=( + const ElfLSectionIterator Other) const { + return !(*this == Other); +} + +ElfLSectionIterator &ElfLSectionIterator::operator++() { + ElfLSectionIteratorImplBase *IImpl = + reinterpret_cast(Impl); + ++(*IImpl); + return *this; +} + +ElfLSection ElfLSectionIterator::operator*() const { + ElfLSectionIteratorImplBase *IImpl = + reinterpret_cast(Impl); + return ElfLSection(**IImpl); +} +#endif // !MAY_USE_LIBELF diff --git a/openmp/libomptarget/plugins/level0/CMakeLists.txt b/openmp/libomptarget/plugins/level0/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/level0/CMakeLists.txt @@ -0,0 +1,67 @@ +##===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +##===----------------------------------------------------------------------===## +# +# Plugin for Level0 devices +# +##===----------------------------------------------------------------------===## + +if(LIBOMPTARGET_DEP_LEVEL0_FOUND) + if((CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)$" OR + CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64") AND + (CMAKE_SYSTEM_NAME MATCHES "Linux" OR + CMAKE_SYSTEM_NAME MATCHES "Windows")) + + libomptarget_say("Building Level0 offloading plugin.") + + # Define the suffix for the runtime messaging dumps. + #add_definitions(-DTARGET_NAME=LEVEL0) + #add_definitions(-DTARGET_LEVEL0_VERSION=?) + + if(LIBOMPTARGET_CMAKE_BUILD_TYPE MATCHES debug) + add_definitions(-DOMPTARGET_LEVEL0_DEBUG) + endif() + + add_library(omptarget.rtl.level0 SHARED src/rtl.cpp) + target_include_directories(omptarget.rtl.level0 PRIVATE + ${LIBOMPTARGET_INCLUDE_DIR} + ${LIBOMPTARGET_DEP_LEVEL0_INCLUDE_DIRS} + ${LIBOMPTARGET_LLVM_INCLUDE_DIRS} + ) + + if(NOT OPENMP_STANDALONE_BUILD) + # We have to disable EH for Windows compilation. + # For standalone OpenMP build, we need to come up + # with our own EH flags management. + llvm_update_compile_flags(omptarget.rtl.level0) + endif() + + install(TARGETS omptarget.rtl.level0 + LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}") + + target_link_libraries(omptarget.rtl.level0 PRIVATE + elf_common + LLVMSupport) + + if(CMAKE_SYSTEM_NAME MATCHES "Linux") + target_link_libraries(omptarget.rtl.level0 PRIVATE + ${LIBOMPTARGET_DEP_LEVEL0_LIBRARIES} ${LIBOMP_LIB_FILE} dl + ${OPENMP_PTHREAD_LIB} + "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports") + else() + message(FATAL_ERROR "Missing platfrom support.") + endif() + + # Report to the parent scope that we are building a plugin for Level0. + set(LIBOMPTARGET_SYSTEM_TARGETS + "${LIBOMPTARGET_SYSTEM_TARGETS} spir64-unknown-unknown" PARENT_SCOPE) + else() + libomptarget_say("Not building Level0 offloading plugin: only support Level0 in x86_64 Linux/Windows hosts.") + endif() +else() + libomptarget_say("Not building Level0 offloading plugin: Level0 not found in system.") +endif() diff --git a/openmp/libomptarget/plugins/level0/src/rtl-trace.h b/openmp/libomptarget/plugins/level0/src/rtl-trace.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/level0/src/rtl-trace.h @@ -0,0 +1,979 @@ +//===--- Target RTLs Implementation ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Code for tracing RTL +// +//===----------------------------------------------------------------------===// +#ifndef RTL_TRACE_H +#define RTL_TRACE_H + +#include +#include "omptarget.h" +#include "Debug.h" + +#define STR(x) #x +#define TO_STRING(x) STR(x) + +#define TARGET_NAME LEVEL0 +#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL" + +extern int DebugLevel; + +#define DPCALL(...) \ + do { \ + if (DebugLevel > 1) \ + DP(__VA_ARGS__); \ + } while (0) + +#define DPI(...) + +#define FATAL_ERROR(Msg) \ + do { \ + fprintf(stderr, "%s --> ", DEBUG_PREFIX); \ + fprintf(stderr, "Error: %s failed (%s) -- exiting...\n", __func__, Msg); \ + exit(EXIT_FAILURE); \ + } while (0) + +#define WARNING(...) \ + do { \ + fprintf(stderr, "%s --> ", DEBUG_PREFIX); \ + fprintf(stderr, "Warning: " __VA_ARGS__); \ + } while (0) + +/// +/// Wrappers for tracing ze API calls. +/// + +#define TRACE_FN(Name) L0TR##Name +#define TRACE_FN_DEF(Name) ze_result_t TRACE_FN(Name) + +#define TRACE_FN_ARG_BEGIN() \ + do { \ + std::string fn(__func__); \ + DPCALL("ZE_CALLEE: %s (\n", fn.substr(4).c_str()); \ + } while (0) + +#define TRACE_FN_ARG_END() DPCALL(")\n") +#define TRACE_FN_ARG(Arg, Fmt) DPCALL(" %s = " Fmt "\n", TO_STRING(Arg), Arg) +#define TRACE_FN_ARG_PTR(Arg) \ + DPCALL(" %s = " DPxMOD "\n", TO_STRING(Arg), DPxPTR(Arg)) +#define TRACE_FN_ARG_UINT(Arg) TRACE_FN_ARG(Arg, "%" PRIu32) +#define TRACE_FN_ARG_UINT64(Arg) TRACE_FN_ARG(Arg, "%" PRIu64) +#define TRACE_FN_ARG_SIZE(Arg) TRACE_FN_ARG(Arg, "%zu") + +TRACE_FN_DEF(zeCommandListAppendBarrier)( + ze_command_list_handle_t hCommandList, + ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + auto rc = zeCommandListAppendBarrier(hCommandList, hSignalEvent, + numWaitEvents, phWaitEvents); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandList); + TRACE_FN_ARG_PTR(hSignalEvent); + TRACE_FN_ARG_UINT(numWaitEvents); + TRACE_FN_ARG_PTR(phWaitEvents); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeCommandListAppendLaunchKernel)( + ze_command_list_handle_t hCommandList, + ze_kernel_handle_t hKernel, + const ze_group_count_t *pLaunchFuncArgs, + ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + auto rc = zeCommandListAppendLaunchKernel(hCommandList, hKernel, + pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandList); + TRACE_FN_ARG_PTR(hKernel); + TRACE_FN_ARG_PTR(pLaunchFuncArgs); + TRACE_FN_ARG_PTR(hSignalEvent); + TRACE_FN_ARG_UINT(numWaitEvents); + TRACE_FN_ARG_PTR(phWaitEvents); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeCommandListAppendMemoryCopy)( + ze_command_list_handle_t hCommandList, + void *dstptr, + const void *srcptr, + size_t size, + ze_event_handle_t hEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + auto rc = zeCommandListAppendMemoryCopy(hCommandList, dstptr, srcptr, size, + hEvent, numWaitEvents, phWaitEvents); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandList); + TRACE_FN_ARG_PTR(dstptr); + TRACE_FN_ARG_PTR(srcptr); + TRACE_FN_ARG_SIZE(size); + TRACE_FN_ARG_PTR(hEvent); + TRACE_FN_ARG_UINT(numWaitEvents); + TRACE_FN_ARG_PTR(phWaitEvents); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeCommandListAppendMemoryPrefetch)( + ze_command_list_handle_t hCommandList, + const void *ptr, + size_t size) { + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandList); + TRACE_FN_ARG_PTR(ptr); + TRACE_FN_ARG_SIZE(size); + TRACE_FN_ARG_END(); + + return zeCommandListAppendMemoryPrefetch(hCommandList, ptr, size); +} + +TRACE_FN_DEF(zeCommandListAppendMemAdvise)( + ze_command_list_handle_t hCommandList, + ze_device_handle_t hDevice, + const void *ptr, + size_t size, + ze_memory_advice_t advice) { + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandList); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(ptr); + TRACE_FN_ARG_SIZE(size); + TRACE_FN_ARG_UINT(advice); + + return zeCommandListAppendMemAdvise(hCommandList, hDevice, ptr, size, advice); +} + +TRACE_FN_DEF(zeCommandListClose)( + ze_command_list_handle_t hCommandList) { + auto rc = zeCommandListClose(hCommandList); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandList); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeCommandListCreate)( + ze_context_handle_t hContext, + ze_device_handle_t hDevice, + const ze_command_list_desc_t *desc, + ze_command_list_handle_t *phCommandList) { + auto rc = zeCommandListCreate(hContext, hDevice, desc, phCommandList); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(desc); + TRACE_FN_ARG_PTR(phCommandList); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeCommandListCreateImmediate)( + ze_context_handle_t hContext, + ze_device_handle_t hDevice, + const ze_command_queue_desc_t *altdesc, + ze_command_list_handle_t *phCommandList) { + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(altdesc); + TRACE_FN_ARG_PTR(phCommandList); + TRACE_FN_ARG_END(); + return zeCommandListCreateImmediate(hContext, hDevice, altdesc, + phCommandList); +} + +TRACE_FN_DEF(zeCommandListDestroy)( + ze_command_list_handle_t hCommandList) { + auto rc = zeCommandListDestroy(hCommandList); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandList); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeCommandListReset)( + ze_command_list_handle_t hCommandList) { + auto rc = zeCommandListReset(hCommandList); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandList); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeCommandQueueCreate)( + ze_context_handle_t hContext, + ze_device_handle_t hDevice, + const ze_command_queue_desc_t *desc, + ze_command_queue_handle_t *phCommandQueue) { + auto rc = zeCommandQueueCreate(hContext, hDevice, desc, phCommandQueue); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(desc); + TRACE_FN_ARG_PTR(phCommandQueue); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeCommandQueueDestroy)( + ze_command_queue_handle_t hCommandQueue) { + auto rc = zeCommandQueueDestroy(hCommandQueue); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandQueue); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeCommandQueueExecuteCommandLists)( + ze_command_queue_handle_t hCommandQueue, + uint32_t numCommandLists, + ze_command_list_handle_t *phCommandLists, + ze_fence_handle_t hFence) { + auto rc = zeCommandQueueExecuteCommandLists(hCommandQueue, numCommandLists, + phCommandLists, hFence); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandQueue); + TRACE_FN_ARG_UINT(numCommandLists); + TRACE_FN_ARG_PTR(phCommandLists); + TRACE_FN_ARG_PTR(hFence); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeCommandQueueSynchronize)( + ze_command_queue_handle_t hCommandQueue, + uint64_t timeout) { + auto rc = zeCommandQueueSynchronize(hCommandQueue, timeout); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandQueue); + TRACE_FN_ARG_UINT64(timeout); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeContextCreate)( + ze_driver_handle_t hDriver, + const ze_context_desc_t *desc, + ze_context_handle_t *phContext) { + auto rc = zeContextCreate(hDriver, desc, phContext); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDriver); + TRACE_FN_ARG_PTR(desc); + TRACE_FN_ARG_PTR(phContext); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeContextDestroy)( + ze_context_handle_t hContext) { + auto rc = zeContextDestroy(hContext); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeDeviceCanAccessPeer)( + ze_device_handle_t hDevice, + ze_device_handle_t hPeerDevice, + ze_bool_t *value) { + auto rc = zeDeviceCanAccessPeer(hDevice, hPeerDevice, value); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(hPeerDevice); + TRACE_FN_ARG_PTR(value); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeDeviceGet)( + ze_driver_handle_t hDriver, + uint32_t *pCount, + ze_device_handle_t *phDevices) { + auto rc = zeDeviceGet(hDriver, pCount, phDevices); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDriver); + TRACE_FN_ARG_PTR(pCount); + TRACE_FN_ARG_PTR(phDevices); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeDeviceGetProperties)( + ze_device_handle_t hDevice, + ze_device_properties_t *pDeviceProperties) { + auto rc = zeDeviceGetProperties(hDevice, pDeviceProperties); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(pDeviceProperties); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeDeviceGetCommandQueueGroupProperties)( + ze_device_handle_t hDevice, + uint32_t *pCount, + ze_command_queue_group_properties_t *pCommandQueueGroupProperties) { + auto rc = zeDeviceGetCommandQueueGroupProperties( + hDevice, pCount, pCommandQueueGroupProperties); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(pCount); + TRACE_FN_ARG_PTR(pCommandQueueGroupProperties); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeDeviceGetComputeProperties)( + ze_device_handle_t hDevice, + ze_device_compute_properties_t *pComputeProperties) { + auto rc = zeDeviceGetComputeProperties(hDevice, pComputeProperties); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(pComputeProperties); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeDeviceGetSubDevices)( + ze_device_handle_t hDevice, + uint32_t *pCount, + ze_device_handle_t *phSubdevices) { + auto rc = zeDeviceGetSubDevices(hDevice, pCount, phSubdevices); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(pCount); + TRACE_FN_ARG_PTR(phSubdevices); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeDeviceGetMemoryProperties)( + ze_device_handle_t hDevice, + uint32_t *pCount, + ze_device_memory_properties_t *pMemProperties) { + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(pCount); + TRACE_FN_ARG_PTR(pMemProperties); + TRACE_FN_ARG_END(); + return zeDeviceGetMemoryProperties(hDevice, pCount, pMemProperties); +} + +TRACE_FN_DEF(zeDeviceGetCacheProperties)( + ze_device_handle_t hDevice, + uint32_t *pCount, + ze_device_cache_properties_t *pCacheProperties) { + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(pCount); + TRACE_FN_ARG_PTR(pCacheProperties); + TRACE_FN_ARG_END(); + return zeDeviceGetCacheProperties(hDevice, pCount, pCacheProperties); +} + +TRACE_FN_DEF(zeDriverGet)( + uint32_t *pCount, + ze_driver_handle_t *phDrivers) { + auto rc = zeDriverGet(pCount, phDrivers); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(pCount); + TRACE_FN_ARG_PTR(phDrivers); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeDriverGetApiVersion)( + ze_driver_handle_t hDriver, + ze_api_version_t *version) { + auto rc = zeDriverGetApiVersion(hDriver, version); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDriver); + TRACE_FN_ARG_PTR(version); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeDriverGetExtensionFunctionAddress)( + ze_driver_handle_t hDriver, + const char *name, + void **ppFunctionAddress) { + auto rc = zeDriverGetExtensionFunctionAddress( + hDriver, name, ppFunctionAddress); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDriver); + TRACE_FN_ARG_PTR(name); + TRACE_FN_ARG_PTR(ppFunctionAddress); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeDriverGetExtensionProperties)( + ze_driver_handle_t hDriver, + uint32_t *pCount, + ze_driver_extension_properties_t *pExtensionProperties) { + auto rc = zeDriverGetExtensionProperties(hDriver, pCount, + pExtensionProperties); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hDriver); + TRACE_FN_ARG_PTR(pCount); + TRACE_FN_ARG_PTR(pExtensionProperties); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeEventCreate)( + ze_event_pool_handle_t hEventPool, + const ze_event_desc_t *desc, + ze_event_handle_t *phEvent) { + auto rc = zeEventCreate(hEventPool, desc, phEvent); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hEventPool); + TRACE_FN_ARG_PTR(desc); + TRACE_FN_ARG_PTR(phEvent); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeEventDestroy)( + ze_event_handle_t hEvent) { + auto rc = zeEventDestroy(hEvent); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hEvent); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeEventHostReset)( + ze_event_handle_t hEvent) { + auto rc = zeEventHostReset(hEvent); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hEvent); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeEventHostSynchronize)( + ze_event_handle_t hEvent, + uint64_t timeout) { + auto rc = zeEventHostSynchronize(hEvent, timeout); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hEvent); + TRACE_FN_ARG(timeout, "%" PRIu64); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeEventPoolCreate)( + ze_context_handle_t hContext, + const ze_event_pool_desc_t *desc, + uint32_t numDevices, + ze_device_handle_t *phDevices, + ze_event_pool_handle_t *phEventPool) { + auto rc = zeEventPoolCreate(hContext, desc, numDevices, phDevices, + phEventPool); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_PTR(desc); + TRACE_FN_ARG_UINT(numDevices); + TRACE_FN_ARG_PTR(phDevices); + TRACE_FN_ARG_PTR(phEventPool); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeEventPoolDestroy)( + ze_event_pool_handle_t hEventPool) { + auto rc = zeEventPoolDestroy(hEventPool); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hEventPool); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeEventQueryKernelTimestamp)( + ze_event_handle_t hEvent, + ze_kernel_timestamp_result_t *dstptr) { + auto rc = zeEventQueryKernelTimestamp(hEvent, dstptr); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hEvent); + TRACE_FN_ARG_PTR(dstptr); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeFenceCreate)( + ze_command_queue_handle_t hCommandQueue, + const ze_fence_desc_t *desc, + ze_fence_handle_t *phFence) { + auto rc = zeFenceCreate(hCommandQueue, desc, phFence); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hCommandQueue); + TRACE_FN_ARG_PTR(desc); + TRACE_FN_ARG_PTR(phFence); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeFenceDestroy)( + ze_fence_handle_t hFence) { + auto rc = zeFenceDestroy(hFence); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hFence); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeFenceHostSynchronize)( + ze_fence_handle_t hFence, + uint64_t timeout) { + auto rc = zeFenceHostSynchronize(hFence, timeout); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hFence); + TRACE_FN_ARG_UINT64(timeout); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeInit)( + ze_init_flag_t flags) { + auto rc = zeInit(flags); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_UINT(flags); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeKernelCreate)( + ze_module_handle_t hModule, + const ze_kernel_desc_t *desc, + ze_kernel_handle_t *phKernel) { + auto rc = zeKernelCreate(hModule, desc, phKernel); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hModule); + TRACE_FN_ARG_PTR(desc); + TRACE_FN_ARG_PTR(phKernel); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeKernelDestroy)( + ze_kernel_handle_t hKernel) { + auto rc = zeKernelDestroy(hKernel); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hKernel); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeKernelGetName)( + ze_kernel_handle_t hKernel, + size_t *pSize, + char *pName) { + auto rc = zeKernelGetName(hKernel, pSize, pName); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hKernel); + TRACE_FN_ARG_PTR(pSize); + TRACE_FN_ARG_PTR(pName); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeKernelGetProperties)( + ze_kernel_handle_t hKernel, + ze_kernel_properties_t *pKernelProperties) { + auto rc = zeKernelGetProperties(hKernel, pKernelProperties); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hKernel); + TRACE_FN_ARG_PTR(pKernelProperties); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeKernelSetArgumentValue)( + ze_kernel_handle_t hKernel, + uint32_t argIndex, + size_t argSize, + const void *pArgValue) { + auto rc = zeKernelSetArgumentValue(hKernel, argIndex, argSize, pArgValue); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hKernel); + TRACE_FN_ARG_UINT(argIndex); + TRACE_FN_ARG_SIZE(argSize); + TRACE_FN_ARG_PTR(pArgValue); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeKernelSetGroupSize)( + ze_kernel_handle_t hKernel, + uint32_t groupSizeX, + uint32_t groupSizeY, + uint32_t groupSizeZ) { + auto rc = zeKernelSetGroupSize(hKernel, groupSizeX, groupSizeY, groupSizeZ); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hKernel); + TRACE_FN_ARG_UINT(groupSizeX); + TRACE_FN_ARG_UINT(groupSizeY); + TRACE_FN_ARG_UINT(groupSizeZ); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeKernelSetIndirectAccess)( + ze_kernel_handle_t hKernel, + ze_kernel_indirect_access_flags_t flags) { + auto rc = zeKernelSetIndirectAccess(hKernel, flags); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hKernel); + TRACE_FN_ARG_UINT(flags); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeKernelSuggestGroupSize)( + ze_kernel_handle_t hKernel, + uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, + uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) { + auto rc = zeKernelSuggestGroupSize(hKernel, + globalSizeX, globalSizeY, globalSizeZ, + groupSizeX, groupSizeY, groupSizeZ); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hKernel); + TRACE_FN_ARG_UINT(globalSizeX); + TRACE_FN_ARG_UINT(globalSizeY); + TRACE_FN_ARG_UINT(globalSizeZ); + TRACE_FN_ARG_PTR(groupSizeX); + TRACE_FN_ARG_PTR(groupSizeY); + TRACE_FN_ARG_PTR(groupSizeZ); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeMemAllocDevice)( + ze_context_handle_t hContext, + const ze_device_mem_alloc_desc_t *device_desc, + size_t size, + size_t alignment, + ze_device_handle_t hDevice, + void **pptr) { + auto rc = zeMemAllocDevice(hContext, device_desc, size, alignment, + hDevice, pptr); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_PTR(device_desc); + TRACE_FN_ARG_SIZE(size); + TRACE_FN_ARG_SIZE(alignment); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(pptr); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeMemAllocHost)( + ze_context_handle_t hContext, + const ze_host_mem_alloc_desc_t *host_desc, + size_t size, + size_t alignment, + void **pptr) { + auto rc = zeMemAllocHost(hContext, host_desc, size, alignment, pptr); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_PTR(host_desc); + TRACE_FN_ARG_SIZE(size); + TRACE_FN_ARG_SIZE(alignment); + TRACE_FN_ARG_PTR(pptr); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeMemAllocShared)( + ze_context_handle_t hContext, + const ze_device_mem_alloc_desc_t *device_desc, + const ze_host_mem_alloc_desc_t *host_desc, + size_t size, + size_t alignment, + ze_device_handle_t hDevice, + void **pptr) { + auto rc = zeMemAllocShared(hContext, device_desc, host_desc, size, + alignment, hDevice, pptr); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_PTR(device_desc); + TRACE_FN_ARG_PTR(host_desc); + TRACE_FN_ARG_SIZE(size); + TRACE_FN_ARG_SIZE(alignment); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(pptr); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeMemFree)( + ze_context_handle_t hContext, + void *ptr) { + auto rc = zeMemFree(hContext, ptr); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_PTR(ptr); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeMemGetAddressRange)( + ze_context_handle_t hContext, + const void *ptr, + void **pBase, + size_t *pSize) { + auto rc = zeMemGetAddressRange(hContext, ptr, pBase, pSize); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_PTR(ptr); + TRACE_FN_ARG_PTR(pBase); + TRACE_FN_ARG_PTR(pSize); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeMemGetAllocProperties)( + ze_context_handle_t hContext, + const void *ptr, + ze_memory_allocation_properties_t *pMemAllocProperties, + ze_device_handle_t *phDevice) { + auto rc = zeMemGetAllocProperties(hContext, ptr, pMemAllocProperties, + phDevice); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_PTR(ptr); + TRACE_FN_ARG_PTR(pMemAllocProperties); + TRACE_FN_ARG_PTR(phDevice); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeModuleBuildLogDestroy)( + ze_module_build_log_handle_t hModuleBuildLog) { + auto rc = zeModuleBuildLogDestroy(hModuleBuildLog); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hModuleBuildLog); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeModuleBuildLogGetString)( + ze_module_build_log_handle_t hModuleBuildLog, + size_t *pSize, + char *pBuildLog) { + auto rc = zeModuleBuildLogGetString(hModuleBuildLog, pSize, pBuildLog); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hModuleBuildLog); + TRACE_FN_ARG_PTR(pSize); + TRACE_FN_ARG_PTR(pBuildLog); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeModuleCreate)( + ze_context_handle_t hContext, + ze_device_handle_t hDevice, + const ze_module_desc_t *desc, + ze_module_handle_t *phModule, + ze_module_build_log_handle_t *phBuildLog) { + auto rc = zeModuleCreate(hContext, hDevice, desc, phModule, phBuildLog); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hContext); + TRACE_FN_ARG_PTR(hDevice); + TRACE_FN_ARG_PTR(desc); + TRACE_FN_ARG_PTR(phModule); + TRACE_FN_ARG_PTR(phBuildLog); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeModuleDestroy)( + ze_module_handle_t hModule) { + auto rc = zeModuleDestroy(hModule); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hModule); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeModuleDynamicLink)( + uint32_t numModules, + ze_module_handle_t *phModules, + ze_module_build_log_handle_t *phLinkLog) { + auto rc = zeModuleDynamicLink(numModules, phModules, phLinkLog); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_UINT(numModules); + TRACE_FN_ARG_PTR(phModules); + TRACE_FN_ARG_PTR(phLinkLog); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeModuleGetGlobalPointer)( + ze_module_handle_t hModule, + const char *pGlobalName, + size_t *pSize, + void **pptr) { + auto rc = zeModuleGetGlobalPointer(hModule, pGlobalName, pSize, pptr); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hModule); + TRACE_FN_ARG_PTR(pGlobalName); + TRACE_FN_ARG_PTR(pSize); + TRACE_FN_ARG_PTR(pptr); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeModuleGetProperties)( + ze_module_handle_t hModule, + ze_module_properties_t *pModuleProperties) { + auto rc = zeModuleGetProperties(hModule, pModuleProperties); + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hModule); + TRACE_FN_ARG_PTR(pModuleProperties); + TRACE_FN_ARG_END(); + return rc; +} + +TRACE_FN_DEF(zeModuleGetKernelNames)( + ze_module_handle_t hModule, + uint32_t *pCount, + const char **pNames) { + TRACE_FN_ARG_BEGIN(); + TRACE_FN_ARG_PTR(hModule); + TRACE_FN_ARG_PTR(pCount); + TRACE_FN_ARG_PTR(pNames); + TRACE_FN_ARG_END(); + return zeModuleGetKernelNames(hModule, pCount, pNames); +} + +#define CALL_ZE(Rc, Fn, ...) \ + do { \ + if (DebugLevel > 1) { \ + DPCALL("ZE_CALLER: %s %s\n", TO_STRING(Fn), TO_STRING(( __VA_ARGS__ ))); \ + Rc = TRACE_FN(Fn)(__VA_ARGS__); \ + } else { \ + Rc = Fn(__VA_ARGS__); \ + } \ + } while (0) + +#define CALL_ZE_RC(Rc, Fn, ...) \ + do { \ + CALL_ZE(Rc, Fn, __VA_ARGS__); \ + if (Rc != ZE_RESULT_SUCCESS) { \ + DP("Error: %s:%s failed with error code %d, %s\n", __func__, #Fn, Rc, \ + getZeErrorName(Rc)); \ + } \ + } while(0) + +/// For non-thread-safe functions +#define CALL_ZE_RET_MTX(Ret, Fn, Mtx, ...) \ + do { \ + Mtx.lock(); \ + ze_result_t rc; \ + CALL_ZE(rc, Fn, __VA_ARGS__); \ + Mtx.unlock(); \ + if (rc != ZE_RESULT_SUCCESS) { \ + DP("Error: %s:%s failed with error code %d, %s\n", __func__, #Fn, rc, \ + getZeErrorName(rc)); \ + return Ret; \ + } \ + } while (0) + +#define CALL_ZE_RET_FAIL_MTX(Fn, Mtx, ...) \ + CALL_ZE_RET_MTX(OFFLOAD_FAIL, Fn, Mtx, __VA_ARGS__) +#define CALL_ZE_RET_NULL_MTX(Fn, Mtx, ...) \ + CALL_ZE_RET_MTX(NULL, Fn, Mtx, __VA_ARGS__) +#define CALL_ZE_RET_ZERO_MTX(Fn, Mtx, ...) \ + CALL_ZE_RET_MTX(0, Fn, Mtx, __VA_ARGS__) + +/// For thread-safe functions +#define CALL_ZE_RET(Ret, Fn, ...) \ + do { \ + ze_result_t rc; \ + CALL_ZE(rc, Fn, __VA_ARGS__); \ + if (rc != ZE_RESULT_SUCCESS) { \ + DP("Error: %s:%s failed with error code %d, %s\n", __func__, #Fn, rc, \ + getZeErrorName(rc)); \ + return Ret; \ + } \ + } while (0) + +#define CALL_ZE_RET_FAIL(Fn, ...) CALL_ZE_RET(OFFLOAD_FAIL, Fn, __VA_ARGS__) +#define CALL_ZE_RET_NULL(Fn, ...) CALL_ZE_RET(NULL, Fn, __VA_ARGS__) +#define CALL_ZE_RET_ZERO(Fn, ...) CALL_ZE_RET(0, Fn, __VA_ARGS__) +#define CALL_ZE_RET_VOID(Fn, ...) CALL_ZE_RET(, Fn, __VA_ARGS__) + +#define CALL_ZE_EXIT_FAIL(Fn, ...) \ + do { \ + ze_result_t rc; \ + CALL_ZE(rc, Fn, __VA_ARGS__); \ + if (rc != ZE_RESULT_SUCCESS) { \ + DP("Error: %s:%s failed with error code %d, %s\n", __func__, #Fn, rc, \ + getZeErrorName(rc)); \ + std::exit(EXIT_FAILURE); \ + } \ + } while (0) + +#define FOREACH_ZE_ERROR_CODE(Fn) \ + Fn(ZE_RESULT_SUCCESS) \ + Fn(ZE_RESULT_NOT_READY) \ + Fn(ZE_RESULT_ERROR_DEVICE_LOST) \ + Fn(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY) \ + Fn(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) \ + Fn(ZE_RESULT_ERROR_MODULE_BUILD_FAILURE) \ + Fn(ZE_RESULT_ERROR_MODULE_LINK_FAILURE) \ + Fn(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS) \ + Fn(ZE_RESULT_ERROR_NOT_AVAILABLE) \ + Fn(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE) \ + Fn(ZE_RESULT_ERROR_UNINITIALIZED) \ + Fn(ZE_RESULT_ERROR_UNSUPPORTED_VERSION) \ + Fn(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE) \ + Fn(ZE_RESULT_ERROR_INVALID_ARGUMENT) \ + Fn(ZE_RESULT_ERROR_INVALID_NULL_HANDLE) \ + Fn(ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE) \ + Fn(ZE_RESULT_ERROR_INVALID_NULL_POINTER) \ + Fn(ZE_RESULT_ERROR_INVALID_SIZE) \ + Fn(ZE_RESULT_ERROR_UNSUPPORTED_SIZE) \ + Fn(ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT) \ + Fn(ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT) \ + Fn(ZE_RESULT_ERROR_INVALID_ENUMERATION) \ + Fn(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION) \ + Fn(ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT) \ + Fn(ZE_RESULT_ERROR_INVALID_NATIVE_BINARY) \ + Fn(ZE_RESULT_ERROR_INVALID_GLOBAL_NAME) \ + Fn(ZE_RESULT_ERROR_INVALID_KERNEL_NAME) \ + Fn(ZE_RESULT_ERROR_INVALID_FUNCTION_NAME) \ + Fn(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION) \ + Fn(ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION) \ + Fn(ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX) \ + Fn(ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE) \ + Fn(ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE) \ + Fn(ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED) \ + Fn(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE) \ + Fn(ZE_RESULT_ERROR_OVERLAPPING_REGIONS) \ + Fn(ZE_RESULT_ERROR_UNKNOWN) + +#define CASE_TO_STRING(Num) case Num: return #Num; +const char *getZeErrorName(int32_t Error) { + switch (Error) { + FOREACH_ZE_ERROR_CODE(CASE_TO_STRING) + default: + return "ZE_RESULT_ERROR_UNKNOWN"; + } +} + +#endif // !defined(RTL_TRACE_H) diff --git a/openmp/libomptarget/plugins/level0/src/rtl.cpp b/openmp/libomptarget/plugins/level0/src/rtl.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/level0/src/rtl.cpp @@ -0,0 +1,5656 @@ +//===--- Target RTLs Implementation ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// RTL for SPIR-V/Xe machine +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#include +#include +#else +#include +#include +#endif // !_WIN32 + +#include "elf_light.h" +#include "omptargetplugin.h" +#include "rtl-trace.h" +#ifdef _WIN32 +#include "intel_win_dlfcn.h" +#endif + +#include "llvm/Support/Endian.h" + +/// Host runtime routines being used +extern "C" { +#ifdef _WIN32 +int __cdecl omp_get_max_teams(void); +int __cdecl omp_get_thread_limit(void); +double __cdecl omp_get_wtime(void); +int __cdecl omp_get_max_threads(void); +int __cdecl __kmpc_global_thread_num(void *); +#else +int omp_get_max_teams(void) __attribute__((weak)); +int omp_get_thread_limit(void) __attribute__((weak)); +double omp_get_wtime(void) __attribute__((weak)); +int omp_get_max_threads(void) __attribute__((weak)); +int __kmpc_global_thread_num(void *) __attribute__((weak)); +#endif +} // extern "C" + +/// Default alignmnet for allocation +#define LEVEL0_ALIGNMENT 0 +/// Default staging buffer size for host to device copy (16KB) +#define LEVEL0_STAGING_BUFFER_SIZE (1 << 14) +/// Default staging buffer count +#define LEVEL0_STAGING_BUFFER_COUNT 64 + +#ifndef EXTRACT_BITS +// MSB=63, LSB=0 +#define EXTRACT_BITS(I64, HIGH, LOW) \ + (((uint64_t)I64) >> (LOW)) & (((uint64_t)1 << ((HIGH) - (LOW) + 1)) - 1) +#endif + +// Subdevice utilities +// Device encoding (MSB=63, LSB=0) +// 63..63: Has subdevice +// 62..58: Reserved +// 57..56: Subdevice level +// 55..48: Subdevice ID start +// 47..40: Subdevice ID count +// 39..32: Subdevice ID stride +// 31..00: Device ID +#define SUBDEVICE_GET_LEVEL(ID) ((uint32_t)EXTRACT_BITS(ID, 57, 56)) +#define SUBDEVICE_GET_START(ID) ((uint32_t)EXTRACT_BITS(ID, 55, 48)) +#define SUBDEVICE_GET_COUNT(ID) ((uint32_t)EXTRACT_BITS(ID, 47, 40)) +#define SUBDEVICE_GET_STRIDE(ID) ((uint32_t)EXTRACT_BITS(ID, 39, 32)) +#define SUBDEVICE_GET_ROOT(ID) ((uint32_t)EXTRACT_BITS(ID, 31, 0)) + +// Subdevice options +#ifndef SUBDEVICE_USE_ROOT_MEMORY +#define SUBDEVICE_USE_ROOT_MEMORY 0 +#endif + +#define ALLOC_KIND_TO_STR(Kind) \ + (Kind == TARGET_ALLOC_HOST ? "host memory" \ + : (Kind == TARGET_ALLOC_SHARED ? "shared memory" \ + : (Kind == TARGET_ALLOC_DEVICE ? "device memory" : "unknown memory"))) + +/// Device type enumeration common to compiler and runtime +enum DeviceArch : uint64_t { + DeviceArch_None = 0, + DeviceArch_Gen9 = 0x0001, + DeviceArch_XeLP = 0x0002, + DeviceArch_XeHP = 0x0004, + DeviceArch_XeHPG = 0x0008, + DeviceArch_x86_64 = 0x0100 +}; + +/// Mapping from device arch to GPU runtime's device identifiers +std::map> DeviceArchMap { + { + DeviceArch_Gen9, { + 0x1900, // SKL + 0x5900, // KBL + 0x3E00, 0x9B00, // CFL + 0x8A00, // ICX + } + }, + { + DeviceArch_XeLP, { + 0xFF20, 0x9A00, // TGL + 0x4900, // DG1 + 0x4C00, // RKL + 0x4600, // ADLS + } + }, +}; + +/// Tentative enumerators used with ompx_get_device_info() and the data type +/// ompx_devinfo_name, char[N], +/// ompx_devinfo_pci_id, uint32_t +/// ompx_devinfo_tile_id, int32_t +/// ompx_devinfo_ccs_id, int32_t +/// ompx_devinfo_num_eus, uint32_t +/// ompx_devinfo_num_threads_per_eu, uint32_t +/// ompx_devinfo_eu_simd_width, uint32_t +/// ompx_devinfo_num_eus_per_subslice, uint32_t +/// ompx_devinfo_num_subslice_per_slice, uint32_t +/// ompx_devinfo_num_slices, uint32_t +/// ompx_devinfo_local_mem_size, size_t +/// ompx_devinfo_global_mem_size, size_t +/// ompx_devinfo_global_mem_cache_size, size_t +/// ompx_devinfo_max_clock_frequency, uint32_t +/// We always need same definition in omp.h. +enum { + ompx_devinfo_name = 0, + ompx_devinfo_pci_id, + ompx_devinfo_tile_id, + ompx_devinfo_ccs_id, + ompx_devinfo_num_eus, + ompx_devinfo_num_threads_per_eu, + ompx_devinfo_eu_simd_width, + ompx_devinfo_num_eus_per_subslice, + ompx_devinfo_num_subslices_per_slice, + ompx_devinfo_num_slices, + ompx_devinfo_local_mem_size, + ompx_devinfo_global_mem_size, + ompx_devinfo_global_mem_cache_size, + ompx_devinfo_max_clock_frequency +}; + +/// Staging buffer +/// A single staging buffer is not enough when batching is enabled since there +/// can be multiple pending copy operations. +class StagingBufferTy { + ze_context_handle_t Context = nullptr; + size_t Size = LEVEL0_STAGING_BUFFER_SIZE; + size_t Count = LEVEL0_STAGING_BUFFER_COUNT; + std::vector Buffers; + size_t Offset = 0; + + void *addBuffers() { + ze_host_mem_alloc_desc_t AllocDesc = { + ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, nullptr, 0 + }; + void *Ret = nullptr; + CALL_ZE_RET_NULL(zeMemAllocHost, Context, &AllocDesc, Size * Count, + LEVEL0_ALIGNMENT, &Ret); + Buffers.push_back(Ret); + return Ret; + } + +public: + ~StagingBufferTy() { + ze_result_t Rc; + (void)Rc; // GCC build compiler thinks Rc is unused for some reason. + for (auto Ptr : Buffers) + CALL_ZE(Rc, zeMemFree, Context, Ptr); + } + + bool initialized() { return Context != nullptr; } + + void init(ze_context_handle_t _Context, size_t _Size, size_t _Count) { + Context = _Context; + Size = _Size; + Count = _Count; + } + + void reset() { Offset = 0; } + + /// Always return the first buffer + void *get() { + if (Size == 0 || Count == 0) + return nullptr; + if (Buffers.empty()) + return addBuffers(); + else + return Buffers[0]; + } + + /// Return the next available buffer + void *getNext() { + void *Ret = nullptr; + if (Size == 0 || Count == 0) + return Ret; + if (Buffers.empty() || Offset >= Buffers.size() * Size * Count) { + Ret = addBuffers(); + if (!Ret) + return Ret; + } else { + Ret = (void *)((uintptr_t)Buffers.back() + (Offset % (Size * Count))); + } + Offset += Size; + return Ret; + } +}; + +struct KernelBatchTy { + uint32_t MaxCommands = 0; + uint32_t NumCommands = 0; + ze_command_list_handle_t CmdList = nullptr; + ze_command_queue_handle_t CmdQueue = nullptr; + ze_event_pool_handle_t EventPool = nullptr; + ze_event_handle_t Event = nullptr; + bool UseImmCmdList = false; + + ~KernelBatchTy() { + if (CmdList) + CALL_ZE_RET_VOID(zeCommandListDestroy, CmdList); + if (CmdQueue) + CALL_ZE_RET_VOID(zeCommandQueueDestroy, CmdQueue); + if (EventPool) { + CALL_ZE_RET_VOID(zeEventDestroy, Event); + CALL_ZE_RET_VOID(zeEventPoolDestroy, EventPool); + } + } + + int32_t enqueueKernel(const ze_kernel_handle_t Kernel, + const ze_group_count_t &GroupCounts) { + // Already locked + CALL_ZE_RET_FAIL(zeCommandListAppendLaunchKernel, CmdList, Kernel, + &GroupCounts, nullptr, 0, nullptr); + NumCommands++; + + if (UseImmCmdList && NumCommands >= MaxCommands) { + CALL_ZE_RET_FAIL(zeCommandListAppendBarrier, CmdList, Event, 0, nullptr); + } + + return OFFLOAD_SUCCESS; + } + + int32_t run(std::mutex &DeviceMtx) { + std::lock_guard Lock(DeviceMtx); + if (NumCommands >= MaxCommands) { + if (UseImmCmdList) { + // Use barrier + event + CALL_ZE_RET_FAIL(zeEventHostSynchronize, Event, UINT64_MAX); + CALL_ZE_RET_FAIL(zeEventHostReset, Event); + } else { + CALL_ZE_RET_FAIL(zeCommandListClose, CmdList); + CALL_ZE_RET_FAIL(zeCommandQueueExecuteCommandLists, CmdQueue, 1, + &CmdList, nullptr); + DP("Submitted %" PRIu32 " kernels to command queue " DPxMOD "\n", + NumCommands, DPxPTR(CmdQueue)); + CALL_ZE_RET_FAIL(zeCommandQueueSynchronize, CmdQueue, UINT64_MAX); + CALL_ZE_RET_FAIL(zeCommandListReset, CmdList); + } + NumCommands = 0; + } + return OFFLOAD_SUCCESS; + } +}; + +/// Data type to track statistics (total, min, max, average). +template +class StatTy { + uint64_t Count = 0; + T Total = 0; + T Min = 0; + T Max = 0; +public: + StatTy& operator+=(const T Num) { + Total += Num; + Min = (Count == 0) ? Num : (std::min)(Min, Num); + Max = (std::max)(Max, Num); + Count++; + return *this; + } + uint64_t count() const { return Count; } + T getMin() const { return Min; } + T getMax() const { return Max; } + T getTot() const { return Total; } + T getAvg() const { return Count > 0 ? (Total / Count) : 0; } +}; + +/// RTL profile -- only host timer for now +class RTLProfileTy { + struct TimeTy { + StatTy HostTime; + StatTy DeviceTime; + }; + int ThreadId; + std::string DeviceIdStr; + std::string DeviceName; + std::map Data; + // L0 RT will keep UseCyclesPerSecondTimer=1 to enable new timer resolution + // during transition period (until 20210504). + uint64_t TimestampNsec = 0; // For version < ZE_API_VERSION_1_1 + uint64_t TimestampCyclePerSec = 0; // For version >= ZE_API_VERSION_1_1 + uint64_t TimestampMax = 0; +public: + static const int64_t MSEC_PER_SEC = 1000; + static const int64_t USEC_PER_SEC = 1000000; + static const int64_t NSEC_PER_SEC = 1000000000; + static int64_t Multiplier; + + RTLProfileTy(const ze_device_properties_t &DeviceProperties, + const std::string &DeviceId, bool UseCyclePerSec) { + ThreadId = __kmpc_global_thread_num(nullptr); + DeviceIdStr = DeviceId; + DeviceName = DeviceProperties.name; + + // TODO: this is an extra check to be on safe side for all driver versions. + // Remove this heuristic when it is not necessary any more. + if (DeviceProperties.timerResolution < 1000) + UseCyclePerSec = false; + + if (UseCyclePerSec) + TimestampCyclePerSec = DeviceProperties.timerResolution; + else + TimestampNsec = DeviceProperties.timerResolution; + auto validBits = DeviceProperties.kernelTimestampValidBits; + if (validBits > 0 && validBits < 64) + TimestampMax = ~(-1ULL << validBits); + else + WARNING("Invalid kernel timestamp bit width (%" PRIu32 "). " + "Long-running kernels may report incorrect device time.\n", + validBits); + } + + ~RTLProfileTy() { printData(); } + + void printData() { + const std::string KernelPrefix("Kernel "); + + auto IsKernel = [&KernelPrefix](const std::string &Key) { + return Key.substr(0, KernelPrefix.size()) == KernelPrefix; + }; + + auto AlignLeft = [](size_t Width, const std::string &Str) { + if (Str.size() < Width) + return Str + std::string(Width - Str.size(), ' '); + return Str; + }; + + // Print number with limited string count + auto PrintNum = [](double Num) { + if (Num > 1e6) + fprintf(stderr, "%10.2e", Num); + else + fprintf(stderr, "%10.2f", Num); + }; + + size_t MaxKeyLength = 0; + for (const auto &D : Data) + if (!IsKernel(D.first) && MaxKeyLength < D.first.size()) + MaxKeyLength = D.first.size(); + + std::string BoldLine(MaxKeyLength + 92, '='); + std::string Line(MaxKeyLength + 92, '-'); + + fprintf(stderr, "%s\n", BoldLine.c_str()); + + fprintf(stderr, "LIBOMPTARGET_PLUGIN_PROFILE(%s) for OMP DEVICE(%s) %s" + ", Thread %" PRId32 "\n", GETNAME(TARGET_NAME), DeviceIdStr.c_str(), + DeviceName.c_str(), ThreadId); + + fprintf(stderr, "%s\n", Line.c_str()); + + // Print kernel ID and name + int KernelID = 0; + for (const auto &D : Data) { + if (!IsKernel(D.first)) + continue; + std::string KernelIDStr = KernelPrefix + std::to_string(KernelID++); + fprintf(stderr, "%s: %s\n", AlignLeft(MaxKeyLength, KernelIDStr).c_str(), + D.first.substr(KernelPrefix.size()).c_str()); + } + + fprintf(stderr, "%s\n", Line.c_str()); + + // Print column headers + bool IsMsec = (Multiplier == MSEC_PER_SEC); + const char *HostTime = IsMsec ? "Host Time (msec)" : "Host Time (usec)"; + const char *DeviceTime = + IsMsec ? "Device Time (msec)" : "Device Time (usec)"; + fprintf(stderr, "%s: %40s%40s\n", AlignLeft(MaxKeyLength, "").c_str(), + AlignLeft(40, HostTime).c_str(), AlignLeft(40, DeviceTime).c_str()); + fprintf(stderr, "%s: %10s%10s%10s%10s%10s%10s%10s%10s%10s\n", + AlignLeft(MaxKeyLength, "Name").c_str(), + "Total", "Average", "Min", "Max", + "Total", "Average", "Min", "Max", "Count"); + fprintf(stderr, "%s\n", Line.c_str()); + + // Print numbers + KernelID = 0; + for (const auto &D : Data) { + std::string Key(D.first); + double HTFactor = Multiplier; + double DTFactor = 0; + if (IsKernel(Key) || + Key.substr(0, 8) == "DataRead" || Key.substr(0, 9) == "DataWrite") { + DTFactor = Multiplier; + if (IsKernel(Key)) + Key = KernelPrefix + std::to_string(KernelID++); + } + auto &HT = D.second.HostTime; + auto &DT = D.second.DeviceTime; + fprintf(stderr, "%s: ", AlignLeft(MaxKeyLength, Key).c_str()); + PrintNum(HT.getTot() * HTFactor); + PrintNum(HT.getAvg() * HTFactor); + PrintNum(HT.getMin() * HTFactor); + PrintNum(HT.getMax() * HTFactor); + PrintNum(DT.getTot() * DTFactor); + PrintNum(DT.getAvg() * DTFactor); + PrintNum(DT.getMin() * DTFactor); + PrintNum(DT.getMax() * DTFactor); + PrintNum((double)HT.count()); + fprintf(stderr, "\n"); + } + + fprintf(stderr, "%s\n", BoldLine.c_str()); + } + + void update(const char *Name, double HostTime) { + std::string Key(Name); + update(Key, HostTime); + } + + void update(const char *Name, double HostTime, double DeviceTime) { + std::string Key(Name); + update(Key, HostTime, DeviceTime); + } + + void update(std::string &Name, double HostTime) { + auto &Time = Data[Name]; + Time.HostTime += HostTime; + } + + void update(std::string &Name, double HostTime, double DeviceTime) { + auto &Time = Data[Name]; + Time.HostTime += HostTime; + Time.DeviceTime += DeviceTime; + } + + /// Return elapsed time from the given profile event + double getEventTime(ze_event_handle_t Event) { + ze_kernel_timestamp_result_t TS; + CALL_ZE_EXIT_FAIL(zeEventQueryKernelTimestamp, Event, &TS); + double WallTime = 0; + + if (TS.global.kernelEnd >= TS.global.kernelStart) + WallTime = TS.global.kernelEnd - TS.global.kernelStart; + else if (TimestampMax > 0) + WallTime = TimestampMax - TS.global.kernelStart + TS.global.kernelEnd + 1; + else + WARNING("Timestamp overflow cannot be handled for this device.\n"); + + if (TimestampNsec > 0) + WallTime *= (double)TimestampNsec / NSEC_PER_SEC; + else + WallTime /= (double)TimestampCyclePerSec; + + return WallTime; + } + + void update(std::string &Name, ze_event_handle_t Event) { + Data[Name].DeviceTime += getEventTime(Event); + } +}; +int64_t RTLProfileTy::Multiplier; + +/// All thread-local data used by RTL +class TLSTy { + /// Command list for each device + std::map CmdLists; + + /// Main copy command list for each device + std::map CopyCmdLists; + + /// Link copy command list for each device + std::map LinkCopyCmdLists; + + /// Command queue for each device + std::map CmdQueues; + + /// CCS Command queue for each device + std::map CCSCmdQueues; + + /// Main copy command queue for each device + std::map CopyCmdQueues; + + /// Link copy command queues for each device + std::map LinkCopyCmdQueues; + + /// Run profile for each device + std::map Profiles; + + /// Staging buffer + StagingBufferTy StagingBuffer; + + /// Subdevice encoding + int64_t SubDeviceCode = 0; + +public: + ~TLSTy() { + for (auto CmdList : CmdLists) + CALL_ZE_EXIT_FAIL(zeCommandListDestroy, CmdList.second); + for (auto CmdList : CopyCmdLists) + CALL_ZE_EXIT_FAIL(zeCommandListDestroy, CmdList.second); + for (auto CmdList : LinkCopyCmdLists) + CALL_ZE_EXIT_FAIL(zeCommandListDestroy, CmdList.second); + for (auto CmdQueue : CmdQueues) + CALL_ZE_EXIT_FAIL(zeCommandQueueDestroy, CmdQueue.second); + for (auto CmdQueue : CCSCmdQueues) + CALL_ZE_EXIT_FAIL(zeCommandQueueDestroy, CmdQueue.second); + for (auto CmdQueue : CopyCmdQueues) + CALL_ZE_EXIT_FAIL(zeCommandQueueDestroy, CmdQueue.second); + for (auto CmdQueue : LinkCopyCmdQueues) + CALL_ZE_EXIT_FAIL(zeCommandQueueDestroy, CmdQueue.second); + for (auto Profile : Profiles) + delete Profile.second; + } + + ze_command_list_handle_t getCmdList(int32_t ID) { + return (CmdLists.count(ID) > 0) ? CmdLists.at(ID) : nullptr; + } + + ze_command_list_handle_t getCopyCmdList(int32_t ID) { + return (CopyCmdLists.count(ID) > 0) ? CopyCmdLists.at(ID) : nullptr; + } + + ze_command_list_handle_t getLinkCopyCmdList(int32_t ID) { + return (LinkCopyCmdLists.count(ID) > 0) ? LinkCopyCmdLists.at(ID): nullptr; + } + + ze_command_queue_handle_t getCmdQueue(int32_t ID) { + return (CmdQueues.count(ID) > 0) ? CmdQueues.at(ID) : nullptr; + } + + ze_command_queue_handle_t getCCSCmdQueue(int32_t ID) { + return (CCSCmdQueues.count(ID) > 0) ? CCSCmdQueues.at(ID) : nullptr; + } + + ze_command_queue_handle_t getCopyCmdQueue(int32_t ID) { + return (CopyCmdQueues.count(ID) > 0) ? CopyCmdQueues.at(ID) : nullptr; + } + + ze_command_queue_handle_t getLinkCopyCmdQueue(int32_t ID) { + return + (LinkCopyCmdQueues.count(ID) > 0) ? LinkCopyCmdQueues.at(ID) : nullptr; + } + + RTLProfileTy *getProfile(int32_t ID) { + return (Profiles.count(ID) > 0) ? Profiles.at(ID) : nullptr; + } + + int64_t getSubDeviceCode() { return SubDeviceCode; } + + StagingBufferTy &getStagingBuffer() { return StagingBuffer; } + + void setCmdList(int32_t ID, ze_command_list_handle_t CmdList) { + CmdLists[ID] = CmdList; + } + + void setCopyCmdList(int32_t ID, ze_command_list_handle_t CmdList) { + CopyCmdLists[ID] = CmdList; + } + + void setLinkCopyCmdList(int32_t ID, ze_command_list_handle_t CmdList) { + LinkCopyCmdLists[ID] = CmdList; + } + + void setCmdQueue(int32_t ID, ze_command_queue_handle_t CmdQueue) { + CmdQueues[ID] = CmdQueue; + } + + void setCCSCmdQueue(int32_t ID, ze_command_queue_handle_t CmdQueue) { + CCSCmdQueues[ID] = CmdQueue; + } + + void setCopyCmdQueue(int32_t ID, ze_command_queue_handle_t CmdQueue) { + CopyCmdQueues[ID] = CmdQueue; + } + + void setLinkCopyCmdQueue(int32_t ID, ze_command_queue_handle_t CmdQueue) { + LinkCopyCmdQueues[ID] = CmdQueue; + } + + void setProfile(int32_t ID, RTLProfileTy *Profile) { + Profiles[ID] = Profile; + } + + void setSubDeviceCode(int64_t Code) { SubDeviceCode = Code; } +}; + +/// Global list for clean-up +std::list *TLSList = nullptr; + +/// Returns thread-local storage while adding a new instance to the global list. +static TLSTy *getTLS() { + static thread_local TLSTy *TLS = nullptr; + static std::mutex Mtx; + if (TLS) + return TLS; + TLS = new TLSTy(); + std::lock_guard Lock(Mtx); + TLSList->push_back(TLS); + return TLS; +} + +int DebugLevel = getDebugLevel(); + +class KernelInfoTy { + uint32_t Version = 0; + uint64_t Attributes1 = 0; + uint64_t WGNum = 0; + uint64_t WINum = 0; + + struct KernelArgInfoTy { + bool IsLiteral = false; + uint32_t Size = 0; + KernelArgInfoTy(bool IsLiteral, uint32_t Size) + : IsLiteral(IsLiteral), Size(Size) {} + }; + std::vector ArgsInfo; + + void checkVersion(uint32_t MinVer) const { + assert(Version >= MinVer && + "API is not supported for this version of KernelInfoTy."); + (void)Version; + } + +public: + KernelInfoTy(uint32_t Version) : Version(Version) {} + void addArgInfo(bool IsLiteral, uint32_t Size) { + checkVersion(1); + ArgsInfo.emplace_back(IsLiteral, Size); + } + size_t getArgsNum() const { + checkVersion(1); + return ArgsInfo.size(); + } + bool isArgLiteral(uint32_t Idx) const { + checkVersion(1); + return ArgsInfo[Idx].IsLiteral; + } + uint32_t getArgSize(uint32_t Idx) const { + checkVersion(1); + return ArgsInfo[Idx].Size; + } + void setAttributes1(uint64_t Val) { + Attributes1 = Val; + } + bool getHasTeamsReduction() const { + return (Attributes1 & 1); + } + void setWGNum(uint64_t Val) { + WGNum = Val; + } + uint64_t getWGNum() const { + return WGNum; + } + void setWINum(uint64_t Val) { + WINum = Val; + } + uint64_t getWINum() const { + return WINum; + } + bool isAtomicFreeReduction() const { + return getWGNum(); + } +}; + +/// Level Zero program that can contain multiple modules. +class LevelZeroProgramTy { + struct DeviceOffloadEntryTy { + /// Common part with the host offload table. + __tgt_offload_entry Base; + /// Length of the Base.name string in bytes including + /// the null terminator. + size_t NameSize; + }; + + /// Cached device image + __tgt_device_image *Image = nullptr; + + /// Cached Level Zero context + ze_context_handle_t Context = nullptr; + + /// Cached Level Zero device + ze_device_handle_t Device = nullptr; + + /// Cached OpenMP device ID + int32_t DeviceId = 0; + + /// Target table + __tgt_target_table Table; + + /// Target entries + std::vector<__tgt_offload_entry> Entries; + + /// Internal offload entries + std::vector OffloadEntries; + + /// Handle multiple modules within a single target image + std::vector Modules; + + /// Kernels created from the target image + std::vector Kernels; + + /// Kernel info added by compiler + std::unordered_map KernelInfo; + + /// Module that contains global data including device RTL + ze_module_handle_t GlobalModule = nullptr; + + /// Requires module link + bool RequiresModuleLink = false; + + /// Is this module library + bool IsLibModule = false; + + /// Loads the device version of the offload table for device \p DeviceId. + /// The table is expected to have \p NumEntries entries. + /// Returns true, if the load was successful, false - otherwise. + bool loadOffloadTable(size_t NumEntries); + + /// Build a single module with the given image, build option, and format. + int32_t addModule(const size_t Size, const uint8_t *Image, + const std::string &BuildOption, ze_module_format_t Format); + + /// Looks up an OpenMP declare target global variable with the given + /// \p Name and \p Size in the device environment for the current device. + /// The lookup is first done via the device offload table. If it fails, + /// then the lookup falls back to non-OpenMP specific lookup on the device. + void *getOffloadVarDeviceAddr(const char *Name, size_t Size); + + /// Read KernelInfo auxiliary information for the specified kernel. + /// The information is stored in \p KernelInfo. + /// The function is called during the binary loading. + bool readKernelInfo(const __tgt_offload_entry &KernelEntry); + +public: + LevelZeroProgramTy() = default; + + LevelZeroProgramTy(__tgt_device_image *Image_, ze_context_handle_t Context_, + ze_device_handle_t Device_, int32_t DeviceId_) : + Image(Image_), Context(Context_), Device(Device_), DeviceId(DeviceId_) {} + + ~LevelZeroProgramTy(); + + /// Build modules from the target image description + int32_t buildModules(std::string &BuildOptions); + + /// Link modules stored in \p Modules. + int32_t linkModules(); + + /// Looks up an external global variable with the given \p Name + /// in the device environment for device \p DeviceId. + /// \p Size must not be null. If (*SizePtr) is not zero, then + /// the lookup verifies that the found variable's size matches + /// (*SizePtr), otherwise, the found variable's size is returned + /// via \p Size. + void *getVarDeviceAddr(const char *Name, size_t *SizePtr); + + /// Looks up an external global variable with the given \p Name + /// and \p Size in the device environment for device \p DeviceId. + void *getVarDeviceAddr(const char *Name, size_t Size); + + /// Build kernels from all modules. + int32_t buildKernels(); + + /// Return the pointer to the offload table. + __tgt_target_table *getTablePtr() { return &Table; } + + /// Returns the auxiliary kernel information for the specified kernel. + const KernelInfoTy *getKernelInfo(ze_kernel_handle_t Kernel) const; +}; + +/// Get default compute group ordinal. Returns Ordinal-NumQueues pair +static std::pair +getComputeOrdinal(ze_device_handle_t Device) { + std::pair Ordinal{UINT32_MAX, 0}; + uint32_t Count = 0; + CALL_ZE_RET(Ordinal, zeDeviceGetCommandQueueGroupProperties, Device, &Count, + nullptr); + ze_command_queue_group_properties_t Init + {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES, nullptr}; + std::vector Properties(Count, Init); + CALL_ZE_RET(Ordinal, zeDeviceGetCommandQueueGroupProperties, Device, &Count, + Properties.data()); + for (uint32_t I = 0; I < Count; I++) { + if (Properties[I].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + Ordinal.first = I; + Ordinal.second = Properties[I].numQueues; + break; + } + } + if (Ordinal.first == UINT32_MAX) + DP("Error: no command queues are found\n"); + + return Ordinal; +} + +/// Get copy command queue group ordinal. Returns Ordinal-NumQueues pair +static std::pair +getCopyOrdinal(ze_device_handle_t Device, bool LinkCopy = false) { + std::pair Ordinal{UINT32_MAX, 0}; + uint32_t Count = 0; + CALL_ZE_RET(Ordinal, zeDeviceGetCommandQueueGroupProperties, Device, &Count, + nullptr); + ze_command_queue_group_properties_t Init + {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES, nullptr}; + std::vector Properties(Count, Init); + CALL_ZE_RET(Ordinal, zeDeviceGetCommandQueueGroupProperties, Device, &Count, + Properties.data()); + + for (uint32_t I = 0; I < Count; I++) { + auto &Flags = Properties[I].flags; + if ((Flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) && + (Flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) { + auto NumQueues = Properties[I].numQueues; + if (LinkCopy && NumQueues > 1) { + Ordinal = {I, NumQueues}; + DP("Found link copy command queue for device " DPxMOD ", ordinal = %" + PRIu32 ", number of queues = %" PRIu32 "\n", DPxPTR(Device), + Ordinal.first, Ordinal.second); + break; + } else if (!LinkCopy && NumQueues == 1) { + Ordinal = {I, NumQueues}; + DP("Found copy command queue for device " DPxMOD ", ordinal = %" PRIu32 + "\n", DPxPTR(Device), Ordinal.first); + break; + } + } + } + return Ordinal; +} + +/// Create a command list with given ordinal and flags +static ze_command_list_handle_t createCmdList( + ze_context_handle_t Context, + ze_device_handle_t Device, + uint32_t Ordinal, + ze_command_list_flags_t Flags, + std::string &DeviceIdStr) { + ze_command_list_desc_t cmdListDesc = { + ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, + nullptr, // extension + Ordinal, + Flags + }; + ze_command_list_handle_t cmdList; + CALL_ZE_RET_NULL(zeCommandListCreate, Context, Device, &cmdListDesc, + &cmdList); + DP("Created a command list " DPxMOD " (Ordinal: %" PRIu32 + ") for device %s.\n", DPxPTR(cmdList), Ordinal, DeviceIdStr.c_str()); + return cmdList; +} + +/// Create a command list with default flags +static ze_command_list_handle_t createCmdList( + ze_context_handle_t Context, ze_device_handle_t Device, uint32_t Ordinal, + std::string &DeviceIdStr) { + return (Ordinal == UINT32_MAX) + ? nullptr + : createCmdList(Context, Device, Ordinal, 0, DeviceIdStr); +} + +/// Create a command queue with given ordinal and flags +static ze_command_queue_handle_t createCmdQueue( + ze_context_handle_t Context, + ze_device_handle_t Device, + uint32_t Ordinal, + uint32_t Index, + ze_command_queue_flags_t Flags, + std::string &DeviceIdStr) { + ze_command_queue_desc_t cmdQueueDesc = { + ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, + nullptr, // extension + Ordinal, + Index, + Flags, // flags + ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, + ZE_COMMAND_QUEUE_PRIORITY_NORMAL + }; + ze_command_queue_handle_t cmdQueue; + CALL_ZE_RET_NULL(zeCommandQueueCreate, Context, Device, &cmdQueueDesc, + &cmdQueue); + DP("Created a command queue " DPxMOD " (Ordinal: %" PRIu32 ", Index: %" PRIu32 + ") for device %s.\n", DPxPTR(cmdQueue), Ordinal, Index, + DeviceIdStr.c_str()); + return cmdQueue; +} + +/// Create a command queue with default flags +static ze_command_queue_handle_t createCmdQueue( + ze_context_handle_t Context, ze_device_handle_t Device, + uint32_t Ordinal, uint32_t Index, std::string &DeviceIdStr) { + return (Ordinal == UINT32_MAX) + ? nullptr + : createCmdQueue(Context, Device, Ordinal, Index, 0, DeviceIdStr); +} + +/// Create a context +static ze_context_handle_t createContext(ze_driver_handle_t Driver) { + ze_context_desc_t contextDesc = { + ZE_STRUCTURE_TYPE_CONTEXT_DESC, + nullptr, // extension + 0 // flags + }; + ze_context_handle_t context; + CALL_ZE_RET_NULL(zeContextCreate, Driver, &contextDesc, &context); + return context; +} + +/// RTL flags +struct RTLFlagsTy { + uint64_t DumpTargetImage : 1; + uint64_t EnableProfile : 1; + uint64_t EnableTargetGlobals : 1; + uint64_t LinkLibDevice : 1; + uint64_t UseHostMemForUSM : 1; + uint64_t UseMemoryPool : 1; + uint64_t UseDriverGroupSizes : 1; + uint64_t UseImageOptions : 1; + uint64_t UseMultipleComputeQueues : 1; + uint64_t ShowBuildLog : 1; + uint64_t UseImmCmdList : 1; + uint64_t Reserved : 53; + RTLFlagsTy() : + DumpTargetImage(0), + EnableProfile(0), + EnableTargetGlobals(0), + LinkLibDevice(0), // TODO: change it to 1 when L0 issue is resolved + UseHostMemForUSM(0), + UseMemoryPool(1), + UseDriverGroupSizes(0), + UseImageOptions(1), + UseMultipleComputeQueues(0), + ShowBuildLog(0), + UseImmCmdList(0), + Reserved(0) {} +}; + +/// Kernel properties. +struct KernelPropertiesTy { + const char *Name = nullptr; + uint32_t Width = 0; + uint32_t SIMDWidth = 0; + uint32_t MaxThreadGroupSize = 0; + ze_kernel_indirect_access_flags_t IndirectAccessFlags = 0; +}; + +/// Common event pool used in the plugin. This event pool assumes all evnets +/// from the pool are host-visible and use the same event pool flag. +class EventPoolTy { + /// Size of L0 event pool created on demand + size_t PoolSize = 64; + + /// Context of the events + ze_context_handle_t Context = nullptr; + + /// Additional event pool flags common to this pull + uint32_t Flags = 0; + + /// Protection + std::unique_ptr Mtx; + + /// List of created L0 event pools + std::list Pools; + + /// List of free L0 events + std::list Events; + +public: + /// Initialize context, flags, and mutex + void init(ze_context_handle_t _Context, uint32_t _Flags) { + Context = _Context; + Flags = _Flags; + Mtx.reset(new std::mutex); + } + + /// Destroys L0 resources + void deinit() { + for (auto E : Events) + CALL_ZE_RET_VOID(zeEventDestroy, E); + for (auto P : Pools) + CALL_ZE_RET_VOID(zeEventPoolDestroy, P); + } + + /// Get a free event from the pool + ze_event_handle_t getEvent() { + std::lock_guard Lock(*Mtx); + + if (Events.empty()) { + // Need to create a new L0 pool + ze_event_pool_desc_t Desc{ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr}; + Desc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | Flags; + Desc.count = PoolSize; + ze_event_pool_handle_t Pool; + CALL_ZE_RET_NULL(zeEventPoolCreate, Context, &Desc, 0, nullptr, &Pool); + Pools.push_back(Pool); + + // Create events + ze_event_desc_t EventDesc{ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr}; + EventDesc.signal = 0; + EventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + for (uint32_t I = 0; I < PoolSize; I++) { + EventDesc.index = I; + ze_event_handle_t Event; + CALL_ZE_RET_NULL(zeEventCreate, Pool, &EventDesc, &Event); + Events.push_back(Event); + } + } + + auto Ret = Events.back(); + Events.pop_back(); + + return Ret; + } + + /// Return an event to the pool + void releaseEvent(ze_event_handle_t Event) { + std::lock_guard Lock(*Mtx); + + CALL_ZE_RET_VOID(zeEventHostReset, Event); + Events.push_back(Event); + } +}; + +typedef std::vector> SubDeviceIdsTy; + +/// Device modes for multi-tile devices +enum DeviceMode { + DEVICE_MODE_TOP = 0, // Use only top-level devices with subdevice clause + DEVICE_MODE_SUB, // Use only tiles + DEVICE_MODE_SUBSUB // Use only c-slices +}; + +/// Specialization constants used for a module compilation. +class SpecConstantsTy { + std::vector ConstantIds; + std::vector ConstantValues; + +public: + SpecConstantsTy() = default; + SpecConstantsTy(const SpecConstantsTy &) = delete; + SpecConstantsTy(const SpecConstantsTy &&Other) + : ConstantIds(std::move(Other.ConstantIds)), + ConstantValues(std::move(Other.ConstantValues)) {} + + ~SpecConstantsTy() { + for (auto I : ConstantValues) { + const char *ValuePtr = reinterpret_cast(I); + delete[] ValuePtr; + } + } + + template + void addConstant(uint32_t Id, T Val) { + const size_t ValSize = sizeof(Val); + char *ValuePtr = new char[ValSize]; + *reinterpret_cast(ValuePtr) = Val; + + ConstantIds.push_back(Id); + ConstantValues.push_back(reinterpret_cast(ValuePtr)); + } + + ze_module_constants_t getModuleConstants() const { + ze_module_constants_t Tmp{static_cast(ConstantValues.size()), + ConstantIds.data(), + // Unfortunately we have to const_cast it. + // L0 data type should probably be fixed. + const_cast(ConstantValues.data())}; + return Tmp; + } +}; + +/// RTL options and flags users can override +struct RTLOptionTy { + /// Binary flags + RTLFlagsTy Flags; + + /// Emulated data transfer latency in microsecond + uint32_t DataTransferLatency = 0; + + /// Device type + int32_t DeviceType = ZE_DEVICE_TYPE_GPU; + + /// Global thread limit obtained from host runtime + uint32_t ThreadLimit = 0; + + /// Global num teams obtained from host runtime + uint32_t NumTeams = 0; + + /// Staging buffer size + size_t StagingBufferSize = LEVEL0_STAGING_BUFFER_SIZE; + + /// Staging buffer count + size_t StagingBufferCount = LEVEL0_STAGING_BUFFER_COUNT; + + /// Copy engine option + /// 0: disabled, 1: main, 2: link, 3: all (default) + int32_t UseCopyEngine = 3; + + /// Memory pool parameters + /// MemPoolInfo[MemType] = {AllocMax(MB), Capacity, PoolSize(MB)} + std::map> MemPoolInfo = { + {TARGET_ALLOC_DEVICE, {1, 4, 256}}, + {TARGET_ALLOC_HOST, {1, 4, 256}}, + {TARGET_ALLOC_SHARED, {8, 4, 256}} + }; + + /// User-directed allocation kind + int32_t TargetAllocKind = TARGET_ALLOC_DEFAULT; + + /// Oversubscription rate for normal kernels + uint32_t SubscriptionRate = 4; + + /// Oversubscription rate for reduction kernels + uint32_t ReductionSubscriptionRate = 16; + bool ReductionSubscriptionRateIsDefault = true; + + /// Forced kernel width only for internal experiments + uint32_t ForcedKernelWidth = 0; + + /// Loop kernels with known ND-range may be known to have + /// few iterations and they may not exploit the offload device + /// to the fullest extent. + /// Let's assume a device has N total HW threads available, + /// and the kernel requires M hardware threads with LWS set to L. + /// If (M < N * ThinThreadsThreshold), then we will try + /// to iteratively divide L by 2 to increase the number of HW + /// threads used for executing the kernel. Effectively, we will + /// end up with L less than the kernel's SIMD width, so the HW + /// threads will not use all their SIMD lanes. This (presumably) should + /// allow more parallelism, because the stalls in the SIMD lanes + /// will be distributed across more HW threads, and the probability + /// of having a stall (or a sequence of stalls) on a critical path + /// in the kernel should decrease. + /// Anyway, this is just a heuristics that seems to work well for some + /// kernels (which poorly expose parallelism in the first place). + double ThinThreadsThreshold = 0.1; + + /// Decides how subdevices are exposed as OpenMP devices + int32_t DeviceMode = DEVICE_MODE_TOP; + + // Compilation options for IGC + // OpenCL 2.0 builtins (like atomic_load_explicit and etc.) are used by + // runtime, so we have to explicitly specify the "-cl-std=CL2.0" compilation + // option. With it, the SPIR-V will be converted to LLVM IR with OpenCL 2.0 + // builtins. Otherwise, SPIR-V will be converted to LLVM IR with OpenCL 1.2 + // builtins. + std::string CompilationOptions = "-cl-std=CL2.0 "; + std::string InternalCompilationOptions = ""; + std::string UserCompilationOptions = ""; + + // Spec constants used for all modules. + SpecConstantsTy CommonSpecConstants; + + /// Read environment variables + RTLOptionTy() { + const char *Env = nullptr; + + // Data transfer latency + if ((Env = readEnvVar("LIBOMPTARGET_DATA_TRANSFER_LATENCY"))) { + std::string Value(Env); + if (Value.substr(0, 2) == "T,") { + int32_t Usec = std::stoi(Value.substr(2).c_str()); + DataTransferLatency = (Usec > 0) ? Usec : 0; + } + } + + // Target device type + if ((Env = readEnvVar("LIBOMPTARGET_DEVICETYPE"))) { + std::string Value(Env); + if (Value == "GPU" || Value == "gpu" || Value == "") { + DeviceType = ZE_DEVICE_TYPE_GPU; + } else if (Value == "CPU" || Value == "cpu") { + DeviceType = ZE_DEVICE_TYPE_CPU; + DP("Warning: CPU device is not supported\n"); + } else { + DP("Warning: Invalid LIBOMPTARGET_DEVICETYPE=%s\n", Env); + } + } + + // Global thread limit + int ThrLimit = omp_get_thread_limit(); + DP("omp_get_thread_limit() returned %" PRId32 "\n", ThrLimit); + // omp_get_thread_limit() would return INT_MAX by default. + // NOTE: Windows.h defines max() macro, so we have to guard + // the call with parentheses. + ThreadLimit = (ThrLimit > 0 && + ThrLimit != (std::numeric_limits::max)()) ? + ThrLimit : 0; + + // Global max number of teams. + int NTeams = omp_get_max_teams(); + DP("omp_get_max_teams() returned %" PRId32 "\n", NTeams); + // omp_get_max_teams() would return INT_MAX by default. + // NOTE: Windows.h defines max() macro, so we have to guard + // the call with parentheses. + NumTeams = (NTeams > 0 && + NTeams != (std::numeric_limits::max)()) ? + NTeams : 0; + + // Compilation options for IGC + if ((Env = readEnvVar("LIBOMPTARGET_LEVEL0_COMPILATION_OPTIONS"))) + UserCompilationOptions += std::string(" ") + Env; + + if (DeviceType == ZE_DEVICE_TYPE_GPU) { + // Intel Graphics compilers that do not support that option + // silently ignore it. Other OpenCL compilers may fail. + Env = readEnvVar("LIBOMPTARGET_LEVEL0_TARGET_GLOBALS"); + if (!Env || parseBool(Env) != 0) { + InternalCompilationOptions += " -cl-take-global-address "; + Flags.EnableTargetGlobals = 1; + } + Env = readEnvVar("LIBOMPTARGET_LEVEL0_MATCH_SINCOSPI"); + if (!Env || parseBool(Env) != 0) { + InternalCompilationOptions += " -cl-match-sincospi "; + } + Env = readEnvVar("LIBOMPTARGET_LEVEL0_USE_DRIVER_GROUP_SIZES"); + if (Env && parseBool(Env) == 1) { + Flags.UseDriverGroupSizes = 1; + } + } + + // Device mode + if ((Env = readEnvVar("LIBOMPTARGET_DEVICES"))) { + std::string Value(Env); + if (Value == "DEVICE" || Value == "device") { + DP("Device mode is %s -- using top-level devices with subdevice " + "clause support\n", Value.c_str()); + DeviceMode = DEVICE_MODE_TOP; + } else if (Value == "SUBDEVICE" || Value == "subdevice") { + DP("Device mode is %s -- using 1st-level sub-devices\n", Value.c_str()); + DeviceMode = DEVICE_MODE_SUB; + } else if (Value == "SUBSUBDEVICE" || Value == "subsubdevice") { + DP("Device mode is %s -- using 2nd-level sub-devices\n", Value.c_str()); + DeviceMode = DEVICE_MODE_SUBSUB; + } else { + DP("Unknown device mode %s\n", Value.c_str()); + } + } + + // Plugin Profile + if ((Env = readEnvVar("LIBOMPTARGET_PLUGIN_PROFILE"))) { + if ((Env[0] == 'T' || Env[0] == '1') && + (Env[1] == ',' || Env[1] == '\0')) { + Flags.EnableProfile = 1; + RTLProfileTy::Multiplier = RTLProfileTy::MSEC_PER_SEC; + if (Env[1] == ',') { + std::string Unit(&Env[2]); + if (Unit == "usec" || Unit == "unit_usec") + RTLProfileTy::Multiplier = RTLProfileTy::USEC_PER_SEC; + } + } + } + + // Managed memory allocator + if ((Env = readEnvVar("LIBOMPTARGET_USM_HOST_MEM"))) { + if (parseBool(Env) == 1) + Flags.UseHostMemForUSM = 1; + } + + // Memory pool + // LIBOMPTARGET_LEVEL0_MEMORY_POOL=