diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt --- a/bolt/CMakeLists.txt +++ b/bolt/CMakeLists.txt @@ -32,10 +32,10 @@ endforeach() set(BOLT_ENABLE_RUNTIME_default OFF) -if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" +if ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" + OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") AND (CMAKE_SYSTEM_NAME STREQUAL "Linux" - OR CMAKE_SYSTEM_NAME STREQUAL "Darwin") - AND "X86" IN_LIST BOLT_TARGETS_TO_BUILD) + OR CMAKE_SYSTEM_NAME STREQUAL "Darwin")) set(BOLT_ENABLE_RUNTIME_default ON) endif() option(BOLT_ENABLE_RUNTIME "Enable BOLT runtime" ${BOLT_ENABLE_RUNTIME_default}) diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt --- a/bolt/runtime/CMakeLists.txt +++ b/bolt/runtime/CMakeLists.txt @@ -27,8 +27,11 @@ -fno-exceptions -fno-rtti -fno-stack-protector - -mno-sse - -fPIC) + -fPIC + -mgeneral-regs-only) +if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + set(BOLT_RT_FLAGS ${BOLT_RT_FLAGS} "-mno-sse") +endif() # Don't let the compiler think it can create calls to standard libs target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS}) @@ -39,7 +42,7 @@ install(TARGETS bolt_rt_instr DESTINATION "lib${LLVM_LIBDIR_SUFFIX}") install(TARGETS bolt_rt_hugify DESTINATION "lib${LLVM_LIBDIR_SUFFIX}") -if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*") +if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*" AND CMAKE_SYSTEM_NAME STREQUAL "Darwin") add_library(bolt_rt_instr_osx STATIC instr.cpp ${CMAKE_CURRENT_BINARY_DIR}/config.h diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h --- a/bolt/runtime/common.h +++ b/bolt/runtime/common.h @@ -6,10 +6,6 @@ // //===----------------------------------------------------------------------===// -#if !defined(__x86_64__) -#error "For x86_64 only" -#endif - #if defined(__linux__) #include @@ -44,44 +40,6 @@ #error "For Linux or MacOS only" #endif -// Save all registers while keeping 16B stack alignment -#define SAVE_ALL \ - "push %%rax\n" \ - "push %%rbx\n" \ - "push %%rcx\n" \ - "push %%rdx\n" \ - "push %%rdi\n" \ - "push %%rsi\n" \ - "push %%rbp\n" \ - "push %%r8\n" \ - "push %%r9\n" \ - "push %%r10\n" \ - "push %%r11\n" \ - "push %%r12\n" \ - "push %%r13\n" \ - "push %%r14\n" \ - "push %%r15\n" \ - "sub $8, %%rsp\n" - -// Mirrors SAVE_ALL -#define RESTORE_ALL \ - "add $8, %%rsp\n" \ - "pop %%r15\n" \ - "pop %%r14\n" \ - "pop %%r13\n" \ - "pop %%r12\n" \ - "pop %%r11\n" \ - "pop %%r10\n" \ - "pop %%r9\n" \ - "pop %%r8\n" \ - "pop %%rbp\n" \ - "pop %%rsi\n" \ - "pop %%rdi\n" \ - "pop %%rdx\n" \ - "pop %%rcx\n" \ - "pop %%rbx\n" \ - "pop %%rax\n" - #define PROT_READ 0x1 /* Page can be read. */ #define PROT_WRITE 0x2 /* Page can be written. */ #define PROT_EXEC 0x4 /* Page can be executed. */ @@ -165,141 +123,41 @@ // Anonymous namespace covering everything but our library entry point namespace { -// Get the difference between runtime addrress of .text section and -// static address in section header table. Can be extracted from arbitrary -// pc value recorded at runtime to get the corresponding static address, which -// in turn can be used to search for indirect call description. Needed because -// indirect call descriptions are read-only non-relocatable data. -uint64_t getTextBaseAddress() { - uint64_t DynAddr; - uint64_t StaticAddr; - __asm__ volatile("leaq __hot_end(%%rip), %0\n\t" - "movabsq $__hot_end, %1\n\t" - : "=r"(DynAddr), "=r"(StaticAddr)); - return DynAddr - StaticAddr; -} - -constexpr uint32_t BufSize = 10240; - -#define _STRINGIFY(x) #x -#define STRINGIFY(x) _STRINGIFY(x) - -uint64_t __read(uint64_t fd, const void *buf, uint64_t count) { - uint64_t ret; -#if defined(__APPLE__) -#define READ_SYSCALL 0x2000003 -#else -#define READ_SYSCALL 0 -#endif - __asm__ __volatile__("movq $" STRINGIFY(READ_SYSCALL) ", %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(fd), "S"(buf), "d"(count) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -uint64_t __write(uint64_t fd, const void *buf, uint64_t count) { - uint64_t ret; -#if defined(__APPLE__) -#define WRITE_SYSCALL 0x2000004 -#else -#define WRITE_SYSCALL 1 -#endif - __asm__ __volatile__("movq $" STRINGIFY(WRITE_SYSCALL) ", %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(fd), "S"(buf), "d"(count) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags, - uint64_t fd, uint64_t offset) { -#if defined(__APPLE__) -#define MMAP_SYSCALL 0x20000c5 -#else -#define MMAP_SYSCALL 9 -#endif - void *ret; - register uint64_t r8 asm("r8") = fd; - register uint64_t r9 asm("r9") = offset; - register uint64_t r10 asm("r10") = flags; - __asm__ __volatile__("movq $" STRINGIFY(MMAP_SYSCALL) ", %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(addr), "S"(size), "d"(prot), "r"(r10), "r"(r8), - "r"(r9) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -uint64_t __munmap(void *addr, uint64_t size) { -#if defined(__APPLE__) -#define MUNMAP_SYSCALL 0x2000049 -#else -#define MUNMAP_SYSCALL 11 -#endif - uint64_t ret; - __asm__ __volatile__("movq $" STRINGIFY(MUNMAP_SYSCALL) ", %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(addr), "S"(size) - : "cc", "rcx", "r11", "memory"); - return ret; -} +struct dirent64 { + uint64_t d_ino; /* Inode number */ + int64_t d_off; /* Offset to next linux_dirent */ + unsigned short d_reclen; /* Length of this linux_dirent */ + unsigned char d_type; + char d_name[]; /* Filename (null-terminated) */ + /* length is actually (d_reclen - 2 - + offsetof(struct linux_dirent, d_name)) */ +}; -#define SIG_BLOCK 0 -#define SIG_UNBLOCK 1 -#define SIG_SETMASK 2 +/* Length of the entries in `struct utsname' is 65. */ +#define _UTSNAME_LENGTH 65 -static const uint64_t MaskAllSignals[] = {-1ULL}; +struct UtsNameTy { + char sysname[_UTSNAME_LENGTH]; /* Operating system name (e.g., "Linux") */ + char nodename[_UTSNAME_LENGTH]; /* Name within "some implementation-defined + network" */ + char release[_UTSNAME_LENGTH]; /* Operating system release (e.g., "2.6.28") */ + char version[_UTSNAME_LENGTH]; /* Operating system version */ + char machine[_UTSNAME_LENGTH]; /* Hardware identifier */ + char domainname[_UTSNAME_LENGTH]; /* NIS or YP domain name */ +}; -uint64_t __sigprocmask(int how, const void *set, void *oldset) { -#if defined(__APPLE__) -#define SIGPROCMASK_SYSCALL 0x2000030 -#else -#define SIGPROCMASK_SYSCALL 14 -#endif - uint64_t ret; - register long r10 asm("r10") = sizeof(uint64_t); - __asm__ __volatile__("movq $" STRINGIFY(SIGPROCMASK_SYSCALL) ", %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(how), "S"(set), "d"(oldset), "r"(r10) - : "cc", "rcx", "r11", "memory"); - return ret; -} +struct timespec { + uint64_t tv_sec; /* seconds */ + uint64_t tv_nsec; /* nanoseconds */ +}; -uint64_t __getpid() { - uint64_t ret; -#if defined(__APPLE__) -#define GETPID_SYSCALL 20 +#if defined(__aarch64__) +#include "sys_aarch64.h" #else -#define GETPID_SYSCALL 39 +#include "sys_x86_64.h" #endif - __asm__ __volatile__("movq $" STRINGIFY(GETPID_SYSCALL) ", %%rax\n" - "syscall\n" - : "=a"(ret) - : - : "cc", "rcx", "r11", "memory"); - return ret; -} -uint64_t __exit(uint64_t code) { -#if defined(__APPLE__) -#define EXIT_SYSCALL 0x2000001 -#else -#define EXIT_SYSCALL 231 -#endif - uint64_t ret; - __asm__ __volatile__("movq $" STRINGIFY(EXIT_SYSCALL) ", %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(code) - : "cc", "rcx", "r11", "memory"); - return ret; -} +constexpr uint32_t BufSize = 10240; // Helper functions for writing strings to the .fdata file. We intentionally // avoid using libc names to make it clear it is our impl. @@ -415,219 +273,6 @@ return false; } -#if !defined(__APPLE__) -// We use a stack-allocated buffer for string manipulation in many pieces of -// this code, including the code that prints each line of the fdata file. This -// buffer needs to accomodate large function names, but shouldn't be arbitrarily -// large (dynamically allocated) for simplicity of our memory space usage. - -// Declare some syscall wrappers we use throughout this code to avoid linking -// against system libc. -uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) { - uint64_t ret; - __asm__ __volatile__("movq $2, %%rax\n" - "syscall" - : "=a"(ret) - : "D"(pathname), "S"(flags), "d"(mode) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -struct dirent { - unsigned long d_ino; /* Inode number */ - unsigned long d_off; /* Offset to next linux_dirent */ - unsigned short d_reclen; /* Length of this linux_dirent */ - char d_name[]; /* Filename (null-terminated) */ - /* length is actually (d_reclen - 2 - - offsetof(struct linux_dirent, d_name)) */ -}; - -long __getdents(unsigned int fd, dirent *dirp, size_t count) { - long ret; - __asm__ __volatile__("movq $78, %%rax\n" - "syscall" - : "=a"(ret) - : "D"(fd), "S"(dirp), "d"(count) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -uint64_t __readlink(const char *pathname, char *buf, size_t bufsize) { - uint64_t ret; - __asm__ __volatile__("movq $89, %%rax\n" - "syscall" - : "=a"(ret) - : "D"(pathname), "S"(buf), "d"(bufsize) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) { - uint64_t ret; - __asm__ __volatile__("movq $8, %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(fd), "S"(pos), "d"(whence) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -int __ftruncate(uint64_t fd, uint64_t length) { - int ret; - __asm__ __volatile__("movq $77, %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(fd), "S"(length) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -int __close(uint64_t fd) { - uint64_t ret; - __asm__ __volatile__("movq $3, %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(fd) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -int __madvise(void *addr, size_t length, int advice) { - int ret; - __asm__ __volatile__("movq $28, %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(addr), "S"(length), "d"(advice) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -#define _UTSNAME_LENGTH 65 - -struct UtsNameTy { - char sysname[_UTSNAME_LENGTH]; /* Operating system name (e.g., "Linux") */ - char nodename[_UTSNAME_LENGTH]; /* Name within "some implementation-defined - network" */ - char release[_UTSNAME_LENGTH]; /* Operating system release (e.g., "2.6.28") */ - char version[_UTSNAME_LENGTH]; /* Operating system version */ - char machine[_UTSNAME_LENGTH]; /* Hardware identifier */ - char domainname[_UTSNAME_LENGTH]; /* NIS or YP domain name */ -}; - -int __uname(struct UtsNameTy *Buf) { - int Ret; - __asm__ __volatile__("movq $63, %%rax\n" - "syscall\n" - : "=a"(Ret) - : "D"(Buf) - : "cc", "rcx", "r11", "memory"); - return Ret; -} - -struct timespec { - uint64_t tv_sec; /* seconds */ - uint64_t tv_nsec; /* nanoseconds */ -}; - -uint64_t __nanosleep(const timespec *req, timespec *rem) { - uint64_t ret; - __asm__ __volatile__("movq $35, %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(req), "S"(rem) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -int64_t __fork() { - uint64_t ret; - __asm__ __volatile__("movq $57, %%rax\n" - "syscall\n" - : "=a"(ret) - : - : "cc", "rcx", "r11", "memory"); - return ret; -} - -int __mprotect(void *addr, size_t len, int prot) { - int ret; - __asm__ __volatile__("movq $10, %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(addr), "S"(len), "d"(prot) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -uint64_t __getppid() { - uint64_t ret; - __asm__ __volatile__("movq $110, %%rax\n" - "syscall\n" - : "=a"(ret) - : - : "cc", "rcx", "r11", "memory"); - return ret; -} - -int __setpgid(uint64_t pid, uint64_t pgid) { - int ret; - __asm__ __volatile__("movq $109, %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(pid), "S"(pgid) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -uint64_t __getpgid(uint64_t pid) { - uint64_t ret; - __asm__ __volatile__("movq $121, %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(pid) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -int __kill(uint64_t pid, int sig) { - int ret; - __asm__ __volatile__("movq $62, %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(pid), "S"(sig) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -int __fsync(int fd) { - int ret; - __asm__ __volatile__("movq $74, %%rax\n" - "syscall\n" - : "=a"(ret) - : "D"(fd) - : "cc", "rcx", "r11", "memory"); - return ret; -} - -// %rdi %rsi %rdx %r10 %r8 -// sys_prctl int option unsigned unsigned unsigned unsigned -// long arg2 long arg3 long arg4 long arg5 -int __prctl(int Option, unsigned long Arg2, unsigned long Arg3, - unsigned long Arg4, unsigned long Arg5) { - int Ret; - register long rdx asm("rdx") = Arg3; - register long r8 asm("r8") = Arg5; - register long r10 asm("r10") = Arg4; - __asm__ __volatile__("movq $157, %%rax\n" - "syscall\n" - : "=a"(Ret) - : "D"(Option), "S"(Arg2), "d"(rdx), "r"(r10), "r"(r8) - :); - return Ret; -} - -#endif - void reportError(const char *Msg, uint64_t Size) { __write(2, Msg, Size); __exit(1); @@ -644,6 +289,12 @@ reportError(Buf, Ptr - Buf); } +#define SIG_BLOCK 0 +#define SIG_UNBLOCK 1 +#define SIG_SETMASK 2 + +static const uint64_t MaskAllSignals[] = {-1ULL}; + class Mutex { volatile bool InUse{false}; diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp --- a/bolt/runtime/instr.cpp +++ b/bolt/runtime/instr.cpp @@ -40,7 +40,6 @@ // //===----------------------------------------------------------------------===// -#if defined (__x86_64__) #include "common.h" // Enables a very verbose logging to stderr useful when debugging @@ -695,12 +694,12 @@ assert(static_cast(FDdir) >= 0, "failed to open /proc/self/map_files"); - while (long Nread = __getdents(FDdir, (struct dirent *)Buf, BufSize)) { + while (long Nread = __getdents64(FDdir, (struct dirent64 *)Buf, BufSize)) { assert(static_cast(Nread) != -1, "failed to get folder entries"); - struct dirent *d; + struct dirent64 *d; for (long Bpos = 0; Bpos < Nread; Bpos += d->d_reclen) { - d = (struct dirent *)(Buf + Bpos); + d = (struct dirent64 *)(Buf + Bpos); uint64_t StartAddress, EndAddress; if (!parseAddressRange(d->d_name, StartAddress, EndAddress)) @@ -1668,6 +1667,17 @@ /// as well as the target address for the call extern "C" __attribute((naked)) void __bolt_instr_indirect_call() { +#if defined(__aarch64__) + // clang-format off + __asm__ __volatile__(SAVE_ALL + "ldp x0, x1, [sp, #288]\n" + "bl instrumentIndirectCall\n" + RESTORE_ALL + "ret\n" + :::); + // clang-format on +#else + // clang-format off __asm__ __volatile__(SAVE_ALL "mov 0xa0(%%rsp), %%rdi\n" "mov 0x98(%%rsp), %%rsi\n" @@ -1675,10 +1685,23 @@ RESTORE_ALL "ret\n" :::); + // clang-format on +#endif } extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall() { +#if defined(__aarch64__) + // clang-format off + __asm__ __volatile__(SAVE_ALL + "ldp x0, x1, [sp, #288]\n" + "bl instrumentIndirectCall\n" + RESTORE_ALL + "ret\n" + :::); + // clang-format on +#else + // clang-format off __asm__ __volatile__(SAVE_ALL "mov 0x98(%%rsp), %%rdi\n" "mov 0x90(%%rsp), %%rsi\n" @@ -1686,21 +1709,48 @@ RESTORE_ALL "ret\n" :::); + // clang-format on +#endif } /// This is hooking ELF's entry, it needs to save all machine state. extern "C" __attribute((naked)) void __bolt_instr_start() { +#if defined(__aarch64__) + // clang-format off + __asm__ __volatile__(SAVE_ALL + "bl __bolt_instr_setup\n" + RESTORE_ALL + "adrp x16, __bolt_start_trampoline\n" + "add x16, x16, #:lo12:__bolt_start_trampoline\n" + "br x16\n" + :::); + // clang-format on +#else + // clang-format off __asm__ __volatile__(SAVE_ALL "call __bolt_instr_setup\n" RESTORE_ALL "jmp __bolt_start_trampoline\n" :::); + // clang-format on +#endif } /// This is hooking into ELF's DT_FINI extern "C" void __bolt_instr_fini() { - __bolt_fini_trampoline(); +#if defined(__aarch64__) + // clang-format off + __asm__ __volatile__(SAVE_ALL + "adrp x16, __bolt_fini_trampoline\n" + "add x16, x16, #:lo12:__bolt_fini_trampoline\n" + "blr x16\n" + RESTORE_ALL + :::); + // clang-format on +#else + __asm__ __volatile__("call __bolt_fini_trampoline\n" :::); +#endif if (__bolt_instr_sleep_time == 0) { int FD = openProfile(); __bolt_instr_data_dump(FD); @@ -1752,4 +1802,3 @@ } #endif -#endif diff --git a/bolt/runtime/sys_aarch64.h b/bolt/runtime/sys_aarch64.h new file mode 100644 --- /dev/null +++ b/bolt/runtime/sys_aarch64.h @@ -0,0 +1,394 @@ +#ifndef LLVM_TOOLS_LLVM_BOLT_SYS_AARCH64 +#define LLVM_TOOLS_LLVM_BOLT_SYS_AARCH64 + +// Save all registers while keeping 16B stack alignment +#define SAVE_ALL \ + "stp x0, x1, [sp, #-16]!\n" \ + "stp x2, x3, [sp, #-16]!\n" \ + "stp x4, x5, [sp, #-16]!\n" \ + "stp x6, x7, [sp, #-16]!\n" \ + "stp x8, x9, [sp, #-16]!\n" \ + "stp x10, x11, [sp, #-16]!\n" \ + "stp x12, x13, [sp, #-16]!\n" \ + "stp x14, x15, [sp, #-16]!\n" \ + "stp x16, x17, [sp, #-16]!\n" \ + "stp x18, x19, [sp, #-16]!\n" \ + "stp x20, x21, [sp, #-16]!\n" \ + "stp x22, x23, [sp, #-16]!\n" \ + "stp x24, x25, [sp, #-16]!\n" \ + "stp x26, x27, [sp, #-16]!\n" \ + "stp x28, x29, [sp, #-16]!\n" \ + "str x30, [sp,#-16]!\n" +// Mirrors SAVE_ALL +#define RESTORE_ALL \ + "ldr x30, [sp], #16\n" \ + "ldp x28, x29, [sp], #16\n" \ + "ldp x26, x27, [sp], #16\n" \ + "ldp x24, x25, [sp], #16\n" \ + "ldp x22, x23, [sp], #16\n" \ + "ldp x20, x21, [sp], #16\n" \ + "ldp x18, x19, [sp], #16\n" \ + "ldp x16, x17, [sp], #16\n" \ + "ldp x14, x15, [sp], #16\n" \ + "ldp x12, x13, [sp], #16\n" \ + "ldp x10, x11, [sp], #16\n" \ + "ldp x8, x9, [sp], #16\n" \ + "ldp x6, x7, [sp], #16\n" \ + "ldp x4, x5, [sp], #16\n" \ + "ldp x2, x3, [sp], #16\n" \ + "ldp x0, x1, [sp], #16\n" + +// Anonymous namespace covering everything but our library entry point +namespace { + +// Get the difference between runtime addrress of .text section and +// static address in section header table. Can be extracted from arbitrary +// pc value recorded at runtime to get the corresponding static address, which +// in turn can be used to search for indirect call description. Needed because +// indirect call descriptions are read-only non-relocatable data. +uint64_t getTextBaseAddress() { + uint64_t DynAddr; + uint64_t StaticAddr; + __asm__ volatile("b .instr%=\n\t" + ".StaticAddr%=:\n\t" + ".dword __hot_end\n\t" + ".instr%=:\n\t" + "ldr %0, .StaticAddr%=\n\t" + "adrp %1, __hot_end\n\t" + "add %1, %1, :lo12:__hot_end\n\t" + : "=r"(StaticAddr), "=r"(DynAddr)); + return DynAddr - StaticAddr; +} + +uint64_t __read(uint64_t fd, const void *buf, uint64_t count) { + uint64_t ret; + register uint64_t x0 __asm__("x0") = fd; + register const void *x1 __asm__("x1") = buf; + register uint64_t x2 __asm__("x2") = count; + register uint32_t w8 __asm__("w8") = 63; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(w8) + : "cc", "memory"); + return ret; +} + +uint64_t __write(uint64_t fd, const void *buf, uint64_t count) { + uint64_t ret; + register uint64_t x0 __asm__("x0") = fd; + register const void *x1 __asm__("x1") = buf; + register uint64_t x2 __asm__("x2") = count; + register uint32_t w8 __asm__("w8") = 64; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(w8) + : "cc", "memory"); + return ret; +} + +void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags, + uint64_t fd, uint64_t offset) { + void *ret; + register uint64_t x0 __asm__("x0") = addr; + register uint64_t x1 __asm__("x1") = size; + register uint64_t x2 __asm__("x2") = prot; + register uint64_t x3 __asm__("x3") = flags; + register uint64_t x4 __asm__("x4") = fd; + register uint64_t x5 __asm__("x5") = offset; + register uint32_t w8 __asm__("w8") = 222; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(w8) + : "cc", "memory"); + return ret; +} + +uint64_t __munmap(void *addr, uint64_t size) { + uint64_t ret; + register void *x0 __asm__("x0") = addr; + register uint64_t x1 __asm__("x1") = size; + register uint32_t w8 __asm__("w8") = 215; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(w8) + : "cc", "memory"); + return ret; +} + +uint64_t __exit(uint64_t code) { + uint64_t ret; + register uint64_t x0 __asm__("x0") = code; + register uint32_t w8 __asm__("w8") = 94; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0) + : "r"(w8) + : "cc", "memory", "x1"); + return ret; +} + +uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) { + uint64_t ret; + register int x0 __asm__("x0") = -100; + register const char *x1 __asm__("x1") = pathname; + register uint64_t x2 __asm__("x2") = flags; + register uint64_t x3 __asm__("x3") = mode; + register uint32_t w8 __asm__("w8") = 56; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(x3), "r"(w8) + : "cc", "memory"); + return ret; +} + +long __getdents64(unsigned int fd, dirent64 *dirp, size_t count) { + long ret; + register unsigned int x0 __asm__("x0") = fd; + register dirent64 *x1 __asm__("x1") = dirp; + register size_t x2 __asm__("x2") = count; + register uint32_t w8 __asm__("w8") = 61; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(w8) + : "cc", "memory"); + return ret; +} + +uint64_t __readlink(const char *pathname, char *buf, size_t bufsize) { + uint64_t ret; + register int x0 __asm__("x0") = -100; + register const char *x1 __asm__("x1") = pathname; + register char *x2 __asm__("x2") = buf; + register size_t x3 __asm__("x3") = bufsize; + register uint32_t w8 __asm__("w8") = 78; // readlinkat + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(x3), "r"(w8) + : "cc", "memory"); + return ret; +} + +uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) { + uint64_t ret; + register uint64_t x0 __asm__("x0") = fd; + register uint64_t x1 __asm__("x1") = pos; + register uint64_t x2 __asm__("x2") = whence; + register uint32_t w8 __asm__("w8") = 62; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(w8) + : "cc", "memory"); + return ret; +} + +int __ftruncate(uint64_t fd, uint64_t length) { + int ret; + register uint64_t x0 __asm__("x0") = fd; + register uint64_t x1 __asm__("x1") = length; + register uint32_t w8 __asm__("w8") = 46; + __asm__ __volatile__("svc #0\n" + "mov %w0, w0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(w8) + : "cc", "memory"); + return ret; +} + +int __close(uint64_t fd) { + int ret; + register uint64_t x0 __asm__("x0") = fd; + register uint32_t w8 __asm__("w8") = 57; + __asm__ __volatile__("svc #0\n" + "mov %w0, w0" + : "=r"(ret), "+r"(x0) + : "r"(w8) + : "cc", "memory", "x1"); + return ret; +} + +int __madvise(void *addr, size_t length, int advice) { + int ret; + register void *x0 __asm__("x0") = addr; + register size_t x1 __asm__("x1") = length; + register int x2 __asm__("x2") = advice; + register uint32_t w8 __asm__("w8") = 233; + __asm__ __volatile__("svc #0\n" + "mov %w0, w0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(w8) + : "cc", "memory"); + return ret; +} + +int __uname(struct UtsNameTy *buf) { + int ret; + register UtsNameTy *x0 __asm__("x0") = buf; + register uint32_t w8 __asm__("w8") = 160; + __asm__ __volatile__("svc #0\n" + "mov %w0, w0" + : "=r"(ret), "+r"(x0) + : "r"(w8) + : "cc", "memory", "x1"); + return ret; +} + +uint64_t __nanosleep(const timespec *req, timespec *rem) { + uint64_t ret; + register const timespec *x0 __asm__("x0") = req; + register timespec *x1 __asm__("x1") = rem; + register uint32_t w8 __asm__("w8") = 101; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(w8) + : "cc", "memory"); + return ret; +} + +int64_t __fork() { + uint64_t ret; + // clone instead of fork with flags + // "CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD" + register uint64_t x0 __asm__("x0") = 0x1200011; + register uint64_t x1 __asm__("x1") = 0; + register uint64_t x2 __asm__("x2") = 0; + register uint64_t x3 __asm__("x3") = 0; + register uint64_t x4 __asm__("x4") = 0; + register uint32_t w8 __asm__("w8") = 220; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(x3), "r"(x4), "r"(w8) + : "cc", "memory"); + return ret; +} + +int __mprotect(void *addr, size_t len, int prot) { + int ret; + register void *x0 __asm__("x0") = addr; + register size_t x1 __asm__("x1") = len; + register int x2 __asm__("x2") = prot; + register uint32_t w8 __asm__("w8") = 226; + __asm__ __volatile__("svc #0\n" + "mov %w0, w0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(w8) + : "cc", "memory"); + return ret; +} + +uint64_t __getpid() { + uint64_t ret; + register uint32_t w8 __asm__("w8") = 172; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret) + : "r"(w8) + : "cc", "memory", "x0", "x1"); + return ret; +} + +uint64_t __getppid() { + uint64_t ret; + register uint32_t w8 __asm__("w8") = 173; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret) + : "r"(w8) + : "cc", "memory", "x0", "x1"); + return ret; +} + +int __setpgid(uint64_t pid, uint64_t pgid) { + int ret; + register uint64_t x0 __asm__("x0") = pid; + register uint64_t x1 __asm__("x1") = pgid; + register uint32_t w8 __asm__("w8") = 154; + __asm__ __volatile__("svc #0\n" + "mov %w0, w0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(w8) + : "cc", "memory"); + return ret; +} + +uint64_t __getpgid(uint64_t pid) { + uint64_t ret; + register uint64_t x0 __asm__("x0") = pid; + register uint32_t w8 __asm__("w8") = 155; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0) + : "r"(w8) + : "cc", "memory", "x1"); + return ret; +} + +int __kill(uint64_t pid, int sig) { + int ret; + register uint64_t x0 __asm__("x0") = pid; + register int x1 __asm__("x1") = sig; + register uint32_t w8 __asm__("w8") = 129; + __asm__ __volatile__("svc #0\n" + "mov %w0, w0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(w8) + : "cc", "memory"); + return ret; +} + +int __fsync(int fd) { + int ret; + register int x0 __asm__("x0") = fd; + register uint32_t w8 __asm__("w8") = 82; + __asm__ __volatile__("svc #0\n" + "mov %w0, w0" + : "=r"(ret), "+r"(x0) + : "r"(w8) + : "cc", "memory", "x1"); + return ret; +} + +uint64_t __sigprocmask(int how, const void *set, void *oldset) { + uint64_t ret; + register int x0 __asm__("x0") = how; + register const void *x1 __asm__("x1") = set; + register void *x2 __asm__("x2") = oldset; + register long x3 asm("x3") = 8; + register uint32_t w8 __asm__("w8") = 135; + __asm__ __volatile__("svc #0\n" + "mov %0, x0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(x3), "r"(w8) + : "cc", "memory"); + return ret; +} + +int __prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) { + int ret; + register int x0 __asm__("x0") = option; + register unsigned long x1 __asm__("x1") = arg2; + register unsigned long x2 __asm__("x2") = arg3; + register unsigned long x3 __asm__("x3") = arg4; + register unsigned long x4 __asm__("x4") = arg5; + register uint32_t w8 __asm__("w8") = 167; + __asm__ __volatile__("svc #0\n" + "mov %w0, w0" + : "=r"(ret), "+r"(x0), "+r"(x1) + : "r"(x2), "r"(x3), "r"(x4), "r"(w8) + : "cc", "memory"); + return ret; +} + +} // anonymous namespace + +#endif diff --git a/bolt/runtime/common.h b/bolt/runtime/sys_x86_64.h copy from bolt/runtime/common.h copy to bolt/runtime/sys_x86_64.h --- a/bolt/runtime/common.h +++ b/bolt/runtime/sys_x86_64.h @@ -1,48 +1,5 @@ -//===- bolt/runtime/common.h ------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if !defined(__x86_64__) -#error "For x86_64 only" -#endif - -#if defined(__linux__) - -#include -#include - -#include "config.h" - -#ifdef HAVE_ELF_H -#include -#endif - -#elif defined(__APPLE__) - -typedef __SIZE_TYPE__ size_t; -#define __SSIZE_TYPE__ \ - __typeof__(_Generic((__SIZE_TYPE__)0, unsigned long long int \ - : (long long int)0, unsigned long int \ - : (long int)0, unsigned int \ - : (int)0, unsigned short \ - : (short)0, unsigned char \ - : (signed char)0)) -typedef __SSIZE_TYPE__ ssize_t; - -typedef unsigned long long uint64_t; -typedef unsigned uint32_t; -typedef unsigned char uint8_t; - -typedef long long int64_t; -typedef int int32_t; - -#else -#error "For Linux or MacOS only" -#endif +#ifndef LLVM_TOOLS_LLVM_BOLT_SYS_X86_64 +#define LLVM_TOOLS_LLVM_BOLT_SYS_X86_64 // Save all registers while keeping 16B stack alignment #define SAVE_ALL \ @@ -62,7 +19,6 @@ "push %%r14\n" \ "push %%r15\n" \ "sub $8, %%rsp\n" - // Mirrors SAVE_ALL #define RESTORE_ALL \ "add $8, %%rsp\n" \ @@ -82,87 +38,6 @@ "pop %%rbx\n" \ "pop %%rax\n" -#define PROT_READ 0x1 /* Page can be read. */ -#define PROT_WRITE 0x2 /* Page can be written. */ -#define PROT_EXEC 0x4 /* Page can be executed. */ -#define PROT_NONE 0x0 /* Page can not be accessed. */ -#define PROT_GROWSDOWN \ - 0x01000000 /* Extend change to start of \ - growsdown vma (mprotect only). */ -#define PROT_GROWSUP \ - 0x02000000 /* Extend change to start of \ - growsup vma (mprotect only). */ - -/* Sharing types (must choose one and only one of these). */ -#define MAP_SHARED 0x01 /* Share changes. */ -#define MAP_PRIVATE 0x02 /* Changes are private. */ -#define MAP_FIXED 0x10 /* Interpret addr exactly. */ - -#if defined(__APPLE__) -#define MAP_ANONYMOUS 0x1000 -#else -#define MAP_ANONYMOUS 0x20 -#endif - -#define MAP_FAILED ((void *)-1) - -#define SEEK_SET 0 /* Seek from beginning of file. */ -#define SEEK_CUR 1 /* Seek from current position. */ -#define SEEK_END 2 /* Seek from end of file. */ - -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_CREAT 64 -#define O_TRUNC 512 -#define O_APPEND 1024 - -// Functions that are required by freestanding environment. Compiler may -// generate calls to these implicitly. -extern "C" { -void *memcpy(void *Dest, const void *Src, size_t Len) { - uint8_t *d = static_cast(Dest); - const uint8_t *s = static_cast(Src); - while (Len--) - *d++ = *s++; - return Dest; -} - -void *memmove(void *Dest, const void *Src, size_t Len) { - uint8_t *d = static_cast(Dest); - const uint8_t *s = static_cast(Src); - if (d < s) { - while (Len--) - *d++ = *s++; - } else { - s += Len - 1; - d += Len - 1; - while (Len--) - *d-- = *s--; - } - - return Dest; -} - -void *memset(void *Buf, int C, size_t Size) { - char *S = (char *)Buf; - for (size_t I = 0; I < Size; ++I) - *S++ = C; - return Buf; -} - -int memcmp(const void *s1, const void *s2, size_t n) { - const uint8_t *c1 = static_cast(s1); - const uint8_t *c2 = static_cast(s2); - for (; n--; c1++, c2++) { - if (*c1 != *c2) - return *c1 < *c2 ? -1 : 1; - } - return 0; -} -} // extern "C" - -// Anonymous namespace covering everything but our library entry point namespace { // Get the difference between runtime addrress of .text section and @@ -179,8 +54,6 @@ return DynAddr - StaticAddr; } -constexpr uint32_t BufSize = 10240; - #define _STRINGIFY(x) #x #define STRINGIFY(x) _STRINGIFY(x) @@ -192,7 +65,7 @@ #define READ_SYSCALL 0 #endif __asm__ __volatile__("movq $" STRINGIFY(READ_SYSCALL) ", %%rax\n" - "syscall\n" + "syscall\n" : "=a"(ret) : "D"(fd), "S"(buf), "d"(count) : "cc", "rcx", "r11", "memory"); @@ -207,7 +80,7 @@ #define WRITE_SYSCALL 1 #endif __asm__ __volatile__("movq $" STRINGIFY(WRITE_SYSCALL) ", %%rax\n" - "syscall\n" + "syscall\n" : "=a"(ret) : "D"(fd), "S"(buf), "d"(count) : "cc", "rcx", "r11", "memory"); @@ -226,7 +99,7 @@ register uint64_t r9 asm("r9") = offset; register uint64_t r10 asm("r10") = flags; __asm__ __volatile__("movq $" STRINGIFY(MMAP_SYSCALL) ", %%rax\n" - "syscall\n" + "syscall\n" : "=a"(ret) : "D"(addr), "S"(size), "d"(prot), "r"(r10), "r"(r8), "r"(r9) @@ -242,19 +115,13 @@ #endif uint64_t ret; __asm__ __volatile__("movq $" STRINGIFY(MUNMAP_SYSCALL) ", %%rax\n" - "syscall\n" + "syscall\n" : "=a"(ret) : "D"(addr), "S"(size) : "cc", "rcx", "r11", "memory"); return ret; } -#define SIG_BLOCK 0 -#define SIG_UNBLOCK 1 -#define SIG_SETMASK 2 - -static const uint64_t MaskAllSignals[] = {-1ULL}; - uint64_t __sigprocmask(int how, const void *set, void *oldset) { #if defined(__APPLE__) #define SIGPROCMASK_SYSCALL 0x2000030 @@ -294,127 +161,13 @@ #endif uint64_t ret; __asm__ __volatile__("movq $" STRINGIFY(EXIT_SYSCALL) ", %%rax\n" - "syscall\n" + "syscall\n" : "=a"(ret) : "D"(code) : "cc", "rcx", "r11", "memory"); return ret; } -// Helper functions for writing strings to the .fdata file. We intentionally -// avoid using libc names to make it clear it is our impl. - -/// Write number Num using Base to the buffer in OutBuf, returns a pointer to -/// the end of the string. -char *intToStr(char *OutBuf, uint64_t Num, uint32_t Base) { - const char *Chars = "0123456789abcdef"; - char Buf[21]; - char *Ptr = Buf; - while (Num) { - *Ptr++ = *(Chars + (Num % Base)); - Num /= Base; - } - if (Ptr == Buf) { - *OutBuf++ = '0'; - return OutBuf; - } - while (Ptr != Buf) - *OutBuf++ = *--Ptr; - - return OutBuf; -} - -/// Copy Str to OutBuf, returns a pointer to the end of the copied string -char *strCopy(char *OutBuf, const char *Str, int32_t Size = BufSize) { - while (*Str) { - *OutBuf++ = *Str++; - if (--Size <= 0) - return OutBuf; - } - return OutBuf; -} - -/// Compare two strings, at most Num bytes. -int strnCmp(const char *Str1, const char *Str2, size_t Num) { - while (Num && *Str1 && (*Str1 == *Str2)) { - Num--; - Str1++; - Str2++; - } - if (Num == 0) - return 0; - return *(unsigned char *)Str1 - *(unsigned char *)Str2; -} - -uint32_t strLen(const char *Str) { - uint32_t Size = 0; - while (*Str++) - ++Size; - return Size; -} - -void *strStr(const char *const Haystack, const char *const Needle) { - int j = 0; - - for (int i = 0; i < strLen(Haystack); i++) { - if (Haystack[i] == Needle[0]) { - for (j = 1; j < strLen(Needle); j++) { - if (Haystack[i + j] != Needle[j]) - break; - } - if (j == strLen(Needle)) - return (void *)&Haystack[i]; - } - } - return nullptr; -} - -void reportNumber(const char *Msg, uint64_t Num, uint32_t Base) { - char Buf[BufSize]; - char *Ptr = Buf; - Ptr = strCopy(Ptr, Msg, BufSize - 23); - Ptr = intToStr(Ptr, Num, Base); - Ptr = strCopy(Ptr, "\n"); - __write(2, Buf, Ptr - Buf); -} - -void report(const char *Msg) { __write(2, Msg, strLen(Msg)); } - -unsigned long hexToLong(const char *Str, char Terminator = '\0') { - unsigned long Res = 0; - while (*Str != Terminator) { - Res <<= 4; - if ('0' <= *Str && *Str <= '9') - Res += *Str++ - '0'; - else if ('a' <= *Str && *Str <= 'f') - Res += *Str++ - 'a' + 10; - else if ('A' <= *Str && *Str <= 'F') - Res += *Str++ - 'A' + 10; - else - return 0; - } - return Res; -} - -/// Starting from character at \p buf, find the longest consecutive sequence -/// of digits (0-9) and convert it to uint32_t. The converted value -/// is put into \p ret. \p end marks the end of the buffer to avoid buffer -/// overflow. The function \returns whether a valid uint32_t value is found. -/// \p buf will be updated to the next character right after the digits. -static bool scanUInt32(const char *&Buf, const char *End, uint32_t &Ret) { - uint64_t Result = 0; - const char *OldBuf = Buf; - while (Buf < End && ((*Buf) >= '0' && (*Buf) <= '9')) { - Result = Result * 10 + (*Buf) - '0'; - ++Buf; - } - if (OldBuf != Buf && Result <= 0xFFFFFFFFu) { - Ret = static_cast(Result); - return true; - } - return false; -} - #if !defined(__APPLE__) // We use a stack-allocated buffer for string manipulation in many pieces of // this code, including the code that prints each line of the fdata file. This @@ -433,18 +186,9 @@ return ret; } -struct dirent { - unsigned long d_ino; /* Inode number */ - unsigned long d_off; /* Offset to next linux_dirent */ - unsigned short d_reclen; /* Length of this linux_dirent */ - char d_name[]; /* Filename (null-terminated) */ - /* length is actually (d_reclen - 2 - - offsetof(struct linux_dirent, d_name)) */ -}; - -long __getdents(unsigned int fd, dirent *dirp, size_t count) { +long __getdents64(unsigned int fd, dirent64 *dirp, size_t count) { long ret; - __asm__ __volatile__("movq $78, %%rax\n" + __asm__ __volatile__("movq $217, %%rax\n" "syscall" : "=a"(ret) : "D"(fd), "S"(dirp), "d"(count) @@ -502,18 +246,6 @@ return ret; } -#define _UTSNAME_LENGTH 65 - -struct UtsNameTy { - char sysname[_UTSNAME_LENGTH]; /* Operating system name (e.g., "Linux") */ - char nodename[_UTSNAME_LENGTH]; /* Name within "some implementation-defined - network" */ - char release[_UTSNAME_LENGTH]; /* Operating system release (e.g., "2.6.28") */ - char version[_UTSNAME_LENGTH]; /* Operating system version */ - char machine[_UTSNAME_LENGTH]; /* Hardware identifier */ - char domainname[_UTSNAME_LENGTH]; /* NIS or YP domain name */ -}; - int __uname(struct UtsNameTy *Buf) { int Ret; __asm__ __volatile__("movq $63, %%rax\n" @@ -524,11 +256,6 @@ return Ret; } -struct timespec { - uint64_t tv_sec; /* seconds */ - uint64_t tv_nsec; /* nanoseconds */ -}; - uint64_t __nanosleep(const timespec *req, timespec *rem) { uint64_t ret; __asm__ __volatile__("movq $35, %%rax\n" @@ -628,71 +355,6 @@ #endif -void reportError(const char *Msg, uint64_t Size) { - __write(2, Msg, Size); - __exit(1); -} - -void assert(bool Assertion, const char *Msg) { - if (Assertion) - return; - char Buf[BufSize]; - char *Ptr = Buf; - Ptr = strCopy(Ptr, "Assertion failed: "); - Ptr = strCopy(Ptr, Msg, BufSize - 40); - Ptr = strCopy(Ptr, "\n"); - reportError(Buf, Ptr - Buf); -} - -class Mutex { - volatile bool InUse{false}; - -public: - bool acquire() { return !__atomic_test_and_set(&InUse, __ATOMIC_ACQUIRE); } - void release() { __atomic_clear(&InUse, __ATOMIC_RELEASE); } -}; - -/// RAII wrapper for Mutex -class Lock { - Mutex &M; - uint64_t SignalMask[1] = {}; - -public: - Lock(Mutex &M) : M(M) { - __sigprocmask(SIG_BLOCK, MaskAllSignals, SignalMask); - while (!M.acquire()) { - } - } - - ~Lock() { - M.release(); - __sigprocmask(SIG_SETMASK, SignalMask, nullptr); - } -}; - -/// RAII wrapper for Mutex -class TryLock { - Mutex &M; - bool Locked = false; - -public: - TryLock(Mutex &M) : M(M) { - int Retry = 100; - while (--Retry && !M.acquire()) - ; - if (Retry) - Locked = true; - } - bool isLocked() { return Locked; } - - ~TryLock() { - if (isLocked()) - M.release(); - } -}; - -inline uint64_t alignTo(uint64_t Value, uint64_t Align) { - return (Value + Align - 1) / Align * Align; -} - } // anonymous namespace + +#endif