diff --git a/bolt/include/bolt/Passes/Hugify.h b/bolt/include/bolt/Passes/Hugify.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Passes/Hugify.h @@ -0,0 +1,29 @@ +//===- bolt/Passes/Hugify.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_PASSES_HUGIFY_H +#define BOLT_PASSES_HUGIFY_H + +#include "bolt/Passes/BinaryPasses.h" + +namespace llvm { +namespace bolt { + +class HugePage : public BinaryFunctionPass { +public: + HugePage(const cl::opt &PrintPass) : BinaryFunctionPass(PrintPass) {} + + void runOnFunctions(BinaryContext &BC) override; + + const char *getName() const override { return "HugePage"; } +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h b/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h --- a/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h +++ b/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h @@ -22,13 +22,11 @@ public: /// Add custom section names generated by the runtime libraries to \p /// SecNames. - void addRuntimeLibSections(std::vector &SecNames) const final { - SecNames.push_back(".bolt.hugify.entries"); - } + void addRuntimeLibSections(std::vector &SecNames) const final {} void adjustCommandLineOptions(const BinaryContext &BC) const final; - void emitBinary(BinaryContext &BC, MCStreamer &Streamer) final; + void emitBinary(BinaryContext &BC, MCStreamer &Streamer) final {} void link(BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld, std::function OnLoad) final; diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -73,6 +73,12 @@ // dbgs() for output within DEBUG(). extern llvm::cl::opt Verbosity; +// The hugify type is used to add extra padding for .text section +// for kernel version < 5.10 +enum HugifyType { Hugify_None, Hugify_Kernel_5_10, Hugify_Kernel_4_18 }; + +extern llvm::cl::opt Hugify; + /// Return true if we should process all functions in the binary. bool processAllFunctions(); diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -15,6 +15,7 @@ FrameOptimizer.cpp HFSort.cpp HFSortPlus.cpp + Hugify.cpp IdenticalCodeFolding.cpp IndirectCallPromotion.cpp Inliner.cpp diff --git a/bolt/lib/Passes/Hugify.cpp b/bolt/lib/Passes/Hugify.cpp new file mode 100644 --- /dev/null +++ b/bolt/lib/Passes/Hugify.cpp @@ -0,0 +1,50 @@ +//===--- bolt/Passes/Hugify.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/Hugify.h" +#include "llvm/Support/CommandLine.h" + +#define DEBUG_TYPE "bolt-hugify" + +using namespace llvm; + +namespace llvm { +namespace bolt { + +void HugePage::runOnFunctions(BinaryContext &BC) { + auto *RtLibrary = BC.getRuntimeLibrary(); + if (!RtLibrary || !BC.isELF() || !BC.StartFunctionAddress) { + return; + } + + auto createSimpleFunction = + [&](std::string Title, std::vector Instrs) -> BinaryFunction * { + BinaryFunction *Func = BC.createInjectedBinaryFunction(Title); + + std::vector> BBs; + BBs.emplace_back(Func->createBasicBlock(nullptr)); + BBs.back()->addInstructions(Instrs.begin(), Instrs.end()); + BBs.back()->setCFIState(0); + BBs.back()->setOffset(BinaryBasicBlock::INVALID_OFFSET); + + Func->insertBasicBlocks(nullptr, std::move(BBs), + /*UpdateLayout=*/true, + /*UpdateCFIState=*/false); + Func->updateState(BinaryFunction::State::CFG_Finalized); + return Func; + }; + + const BinaryFunction *const Start = + BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress); + assert(Start && "Entry point function not found"); + const MCSymbol *StartSym = Start->getSymbol(); + createSimpleFunction("__bolt_hugify_start_program", + BC.MIB->createSymbolTrampoline(StartSym, BC.Ctx.get())); +} +} // namespace bolt +} // namespace llvm \ No newline at end of file diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -13,6 +13,7 @@ #include "bolt/Passes/AsmDump.h" #include "bolt/Passes/CMOVConversion.h" #include "bolt/Passes/FrameOptimizer.h" +#include "bolt/Passes/Hugify.h" #include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Passes/IndirectCallPromotion.h" #include "bolt/Passes/Inliner.h" @@ -333,6 +334,8 @@ if (opts::Instrument) Manager.registerPass(std::make_unique(NeverPrint)); + else if (opts::Hugify) + Manager.registerPass(std::make_unique(NeverPrint)); Manager.registerPass(std::make_unique(NeverPrint)); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -75,7 +75,7 @@ extern cl::opt AlignMacroOpFusion; extern cl::list HotTextMoveSections; -extern cl::opt Hugify; +extern cl::opt Hugify; extern cl::opt Instrument; extern cl::opt JumpTables; extern cl::list ReorderData; @@ -485,6 +485,10 @@ NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign); NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign); + // Hugify: Additional huge page from left side + if (opts::Hugify == opts::HugifyType::Hugify_Kernel_4_18) + NextAvailableAddress += BC->PageAlign; + if (!opts::UseGnuStack) { // This is where the black magic happens. Creating PHDR table in a segment // other than that containing ELF header is tricky. Some loaders and/or @@ -3630,6 +3634,10 @@ Address = alignTo(Address, Section->getAlignment()); Section->setOutputAddress(Address); Address += Section->getOutputSize(); + + // Hugify: Additional huge page from right side + if (opts::Hugify == opts::HugifyType::Hugify_Kernel_4_18) + Address = alignTo(Address, Section->getAlignment()); } // Make sure we allocate enough space for huge pages. diff --git a/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp --- a/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp +++ b/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp @@ -26,13 +26,6 @@ extern cl::opt HotText; -cl::opt - Hugify("hugify", - cl::desc("Automatically put hot code on 2MB page(s) (hugify) at " - "runtime. No manual call to hugify is needed in the binary " - "(which is what --hot-text relies on)."), - cl::cat(BoltOptCategory)); - static cl::opt RuntimeHugifyLib( "runtime-hugify-lib", cl::desc("specify file name of the runtime hugify library"), @@ -60,35 +53,6 @@ } } -void HugifyRuntimeLibrary::emitBinary(BinaryContext &BC, MCStreamer &Streamer) { - const BinaryFunction *StartFunction = - BC.getBinaryFunctionAtAddress(*(BC.StartFunctionAddress)); - assert(!StartFunction->isFragment() && "expected main function fragment"); - if (!StartFunction) { - errs() << "BOLT-ERROR: failed to locate function at binary start address\n"; - exit(1); - } - - const auto Flags = BinarySection::getFlags(/*IsReadOnly=*/false, - /*IsText=*/false, - /*IsAllocatable=*/true); - MCSectionELF *Section = - BC.Ctx->getELFSection(".bolt.hugify.entries", ELF::SHT_PROGBITS, Flags); - - // __bolt_hugify_init_ptr stores the poiter the hugify library needs to - // jump to after finishing the init code. - MCSymbol *InitPtr = BC.Ctx->getOrCreateSymbol("__bolt_hugify_init_ptr"); - - Section->setAlignment(llvm::Align(BC.RegularPageSize)); - Streamer.switchSection(Section); - - Streamer.emitLabel(InitPtr); - Streamer.emitSymbolAttribute(InitPtr, MCSymbolAttr::MCSA_Global); - Streamer.emitValue( - MCSymbolRefExpr::create(StartFunction->getSymbol(), *(BC.Ctx)), - /*Size=*/8); -} - void HugifyRuntimeLibrary::link(BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld, std::function OnLoad) { diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -191,6 +191,23 @@ cl::init(0), cl::ZeroOrMore, cl::cat(BoltCategory), cl::sub(cl::SubCommand::getAll())); +cl::opt + Hugify("hugify", + cl::desc("Automatically put hot code on 2MB page(s) (hugify) at " + "runtime. No manual call to hugify is needed in the binary " + "(which is what --hot-text relies on). For linux kernel < 5.10 " + "extra padding is needed for .text section from left and " + "rigth sides, more details: https://reviews.llvm.org/D129107"), + cl::init(Hugify_None), + cl::values(clEnumValN(Hugify_Kernel_5_10, + "5.10", + "no extra padding for .text section"), + clEnumValN(Hugify_Kernel_4_18, + "4.18", + "adds extra padding for .text section")), + cl::ZeroOrMore, + cl::cat(BoltOptCategory)); + bool processAllFunctions() { if (opts::AggregateOnly) return false; diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt --- a/bolt/runtime/CMakeLists.txt +++ b/bolt/runtime/CMakeLists.txt @@ -25,10 +25,11 @@ -fno-exceptions -fno-rtti -fno-stack-protector - -mno-sse) + -mno-sse + -fPIE) # Don't let the compiler think it can create calls to standard libs -target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE) +target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS}) target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS}) target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h --- a/bolt/runtime/common.h +++ b/bolt/runtime/common.h @@ -79,7 +79,8 @@ // Functions that are required by freestanding environment. Compiler may // generate calls to these implicitly. extern "C" { -void *memcpy(void *Dest, const void *Src, size_t Len) { +void __attribute__((noinline)) * + memcpy(void *Dest, const void *Src, size_t Len) { uint8_t *d = static_cast(Dest); const uint8_t *s = static_cast(Src); while (Len--) @@ -283,6 +284,22 @@ return Size; } +void *strStr(const char *const Haystack, const char *const Needle) { + int j = 0; + + for (int i = 0; i < strLen(Haystack); i++) { + if (Haystack[i] == Needle[0]) { + for (j = 1; j < strLen(Needle); j++) { + if (Haystack[i + j] != Needle[j]) + break; + } + if (j == strLen(Needle)) + return (void *)&Haystack[i]; + } + } + return nullptr; +} + void reportNumber(const char *Msg, uint64_t Num, uint32_t Base) { char Buf[BufSize]; char *Ptr = Buf; @@ -310,6 +327,25 @@ return Res; } +/// Starting from character at \p buf, find the longest consecutive sequence +/// of digits (0-9) and convert it to uint32_t. The converted value +/// is put into \p ret. \p end marks the end of the buffer to avoid buffer +/// overflow. The function \returns whether a valid uint32_t value is found. +/// \p buf will be updated to the next character right after the digits. +static bool scanUInt32(const char *&Buf, const char *End, uint32_t &Ret) { + uint64_t Result = 0; + const char *OldBuf = Buf; + while (Buf < End && ((*Buf) >= '0' && (*Buf) <= '9')) { + Result = Result * 10 + (*Buf) - '0'; + ++Buf; + } + if (OldBuf != Buf && Result <= 0xFFFFFFFFu) { + Ret = static_cast(Result); + return true; + } + return false; +} + #if !defined(__APPLE__) // We use a stack-allocated buffer for string manipulation in many pieces of // this code, including the code that prints each line of the fdata file. This @@ -387,6 +423,28 @@ return ret; } +#define _UTSNAME_LENGTH 65 + +struct UtsNameTy { + char sysname[_UTSNAME_LENGTH]; /* Operating system name (e.g., "Linux") */ + char nodename[_UTSNAME_LENGTH]; /* Name within "some implementation-defined + network" */ + char release[_UTSNAME_LENGTH]; /* Operating system release (e.g., "2.6.28") */ + char version[_UTSNAME_LENGTH]; /* Operating system version */ + char machine[_UTSNAME_LENGTH]; /* Hardware identifier */ + char domainname[_UTSNAME_LENGTH]; /* NIS or YP domain name */ +}; + +int __uname(struct UtsNameTy *Buf) { + int Ret; + __asm__ __volatile__("movq $63, %%rax\n" + "syscall\n" + : "=a"(Ret) + : "D"(Buf) + : "cc", "rcx", "r11", "memory"); + return Ret; +} + struct timespec { uint64_t tv_sec; /* seconds */ uint64_t tv_nsec; /* nanoseconds */ @@ -482,6 +540,23 @@ return ret; } +// %rdi %rsi %rdx %r10 %r8 +// sys_prctl int option unsigned unsigned unsigned unsigned +// long arg2 long arg3 long arg4 long arg5 +int __prctl(int Option, unsigned long Arg2, unsigned long Arg3, + unsigned long Arg4, unsigned long Arg5) { + int Ret; + register long rdx asm("rdx") = Arg3; + register long r8 asm("r8") = Arg5; + register long r10 asm("r10") = Arg4; + __asm__ __volatile__("movq $157, %%rax\n" + "syscall\n" + : "=a"(Ret) + : "D"(Option), "S"(Arg2), "d"(rdx), "r"(r10), "r"(r8) + :); + return Ret; +} + #endif void reportError(const char *Msg, uint64_t Size) { diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp --- a/bolt/runtime/hugify.cpp +++ b/bolt/runtime/hugify.cpp @@ -1,129 +1,184 @@ -//===- bolt/runtime/hugify.cpp --------------------------------------------===// +//===- bolt/runtime/hugify.cpp -------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// #if defined (__x86_64__) #if !defined(__APPLE__) #include "common.h" -#include // Enables a very verbose logging to stderr useful when debugging -//#define ENABLE_DEBUG +// #define ENABLE_DEBUG -// Function pointers to init routines in the binary, so we can resume -// regular execution of the function that we hooked. -extern void (*__bolt_hugify_init_ptr)(); +// Function constains trampoline to _start, +// so we can resume regular execution of the function that we hooked. +extern void __bolt_hugify_start_program(); // The __hot_start and __hot_end symbols set by Bolt. We use them to figure // out the rage for marking huge pages. extern uint64_t __hot_start; extern uint64_t __hot_end; -#ifdef MADV_HUGEPAGE +static void getKernelVersion(uint32_t *Val) { + // release should be in the format: %d.%d.%d + // major, minor, release + struct UtsNameTy UtsName; + int Ret = __uname(&UtsName); + const char *Buf = UtsName.release; + const char *End = Buf + strLen(Buf); + const char Delims[2][2] = {".", "."}; + + for (int i = 0; i < 3; ++i) { + if (!scanUInt32(Buf, End, Val[i])) { + return; + } + if (i < sizeof(Delims) / sizeof(Delims[0])) { + const char *Ptr = Delims[i]; + while (*Ptr != '\0') { + if (*Ptr != *Buf) { + return; + } + ++Ptr; + ++Buf; + } + } + } +} + /// Check whether the kernel supports THP via corresponding sysfs entry. -static bool has_pagecache_thp_support() { - char buf[256] = {0}; - const char *madviseStr = "always [madvise] never"; +/// thp works only starting from 5.10 +static bool hasPagecacheTHPSupport() { + char Buf[64]; + const uint64_t MadviseOptions = 2; + const char *const MadviseOpt[MadviseOptions] = {"[always]", "[madvise]"}; - int fd = __open("/sys/kernel/mm/transparent_hugepage/enabled", + int FD = __open("/sys/kernel/mm/transparent_hugepage/enabled", 0 /* O_RDONLY */, 0); - if (fd < 0) + if (FD < 0) return false; - size_t res = __read(fd, buf, 256); - if (res < 0) + memset(Buf, 0, sizeof(Buf)); + const size_t Res = __read(FD, Buf, sizeof(Buf)); + if (Res < 0) return false; - int cmp = strnCmp(buf, madviseStr, strLen(madviseStr)); - return cmp == 0; -} + struct KernelVersionTy { + uint32_t major; + uint32_t minor; + uint32_t release; + }; -static void hugify_for_old_kernel(uint8_t *from, uint8_t *to) { - size_t size = to - from; + KernelVersionTy KernelVersion; - uint8_t *mem = reinterpret_cast( - __mmap(0, size, 0x3 /* PROT_READ | PROT_WRITE*/, - 0x22 /* MAP_PRIVATE | MAP_ANONYMOUS*/, -1, 0)); + getKernelVersion((uint32_t *)&KernelVersion); + + for (unsigned int i = 0; i < MadviseOptions; i++) { + if (strStr(Buf, MadviseOpt[i]) && KernelVersion.major >= 5 && + KernelVersion.minor >= 10) { + return true; + } + } + return false; +} - if (mem == (void *)MAP_FAILED) { - char msg[] = "Could not allocate memory for text move\n"; - reportError(msg, sizeof(msg)); +static void hugifyForOldKernel(uint8_t *From, uint8_t *To, + uint8_t *FromAlignedPage, + uint8_t *ToAlignedPage) { + const size_t HugePageBytes = 2L * 1024 * 1024; + const size_t Size = To - From; + const size_t SizeHugePageAligned = + Size + (HugePageBytes - ((intptr_t)Size & (HugePageBytes - 1))); + uint8_t *Mem = reinterpret_cast( + __mmap(0, SizeHugePageAligned, 0x3 /* PROT_READ | PROT_WRITE */, + 0x22 /* MAP_PRIVATE | MAP_ANONYMOUS */, -1, 0)); + + if (Mem == ((void *)-1) /* MAP_FAILED */) { + char Msg[] = "[hugify] could not allocate memory for text move\n"; + reportError(Msg, sizeof(Msg)); } + #ifdef ENABLE_DEBUG - reportNumber("Allocated temporary space: ", (uint64_t)mem, 16); + reportNumber("[hugify] allocated temporary address: ", (uint64_t)Mem, 16); + reportNumber( + "[hugify] allocated aligned size: ", (uint64_t)SizeHugePageAligned, 16); + reportNumber("[hugify] allocated size: ", (uint64_t)Size, 16); #endif - // Copy the hot code to a temproary location. - memcpy(mem, from, size); + // Copy the hot code to a temporary location. + memcpy(Mem, From, Size); + __prctl(41 /* PR_SET_THP_DISABLE */, 0, 0, 0, 0); // Maps out the existing hot code. - if (__mmap(reinterpret_cast(from), size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, - 0) == (void *)MAP_FAILED) { - char msg[] = "failed to mmap memory for large page move terminating\n"; - reportError(msg, sizeof(msg)); + if (__mmap(reinterpret_cast(FromAlignedPage), + ToAlignedPage - FromAlignedPage, 0x3 /* PROT_READ | PROT_WRITE */, + 0x32 /* MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE */, -1, + 0) == ((void *)-1) /*MAP_FAILED*/) { + char Msg[] = + "[hugify] failed to mmap memory for large page move terminating\n"; + reportError(Msg, sizeof(Msg)); } // Mark the hot code page to be huge page. - if (__madvise(from, size, MADV_HUGEPAGE) == -1) { - char msg[] = "failed to allocate large page\n"; - reportError(msg, sizeof(msg)); + if (__madvise(FromAlignedPage, ToAlignedPage - FromAlignedPage, + 14 /* MADV_HUGEPAGE */) == -1) { + char Msg[] = "[hugify] failed to allocate large page\n"; + reportError(Msg, sizeof(Msg)); } // Copy the hot code back. - memcpy(from, mem, size); + memcpy(From, Mem, SizeHugePageAligned); // Change permission back to read-only, ignore failure - __mprotect(from, size, PROT_READ | PROT_EXEC); + __mprotect(FromAlignedPage, ToAlignedPage - FromAlignedPage, + 0x5 /* PROT_READ | PROT_EXEC */); - __munmap(mem, size); + __munmap(Mem, SizeHugePageAligned); } #endif extern "C" void __bolt_hugify_self_impl() { -#ifdef MADV_HUGEPAGE - uint8_t *hotStart = (uint8_t *)&__hot_start; - uint8_t *hotEnd = (uint8_t *)&__hot_end; + uint8_t *HotStart = (uint8_t *)&__hot_start; + uint8_t *HotEnd = (uint8_t *)&__hot_end; // Make sure the start and end are aligned with huge page address - const size_t hugePageBytes = 2L * 1024 * 1024; - uint8_t *from = hotStart - ((intptr_t)hotStart & (hugePageBytes - 1)); - uint8_t *to = hotEnd + (hugePageBytes - 1); - to -= (intptr_t)to & (hugePageBytes - 1); + const size_t HugePageBytes = 2L * 1024 * 1024; + uint8_t *From = HotStart - ((intptr_t)HotStart & (HugePageBytes - 1)); + uint8_t *To = HotEnd + (HugePageBytes - 1); + To -= (intptr_t)To & (HugePageBytes - 1); #ifdef ENABLE_DEBUG - reportNumber("[hugify] hot start: ", (uint64_t)hotStart, 16); - reportNumber("[hugify] hot end: ", (uint64_t)hotEnd, 16); - reportNumber("[hugify] aligned huge page from: ", (uint64_t)from, 16); - reportNumber("[hugify] aligned huge page to: ", (uint64_t)to, 16); + reportNumber("[hugify] hot start: ", (uint64_t)HotStart, 16); + reportNumber("[hugify] hot end: ", (uint64_t)HotEnd, 16); + reportNumber("[hugify] aligned huge page from: ", (uint64_t)From, 16); + reportNumber("[hugify] aligned huge page to: ", (uint64_t)To, 16); #endif - if (!has_pagecache_thp_support()) { - hugify_for_old_kernel(from, to); + if (!hasPagecacheTHPSupport()) { +#ifdef ENABLE_DEBUG + report("[hugify] workaround with memory alignment for kernel < 5.10\n"); +#endif + hugifyForOldKernel(HotStart, HotEnd, From, To); return; } - if (__madvise(from, (to - from), MADV_HUGEPAGE) == -1) { - char msg[] = "failed to allocate large page\n"; + if (__madvise(From, (To - From), 14 /* MADV_HUGEPAGE */) == -1) { + char Msg[] = "[hugify] failed to allocate large page\n"; // TODO: allow user to control the failure behavior. - reportError(msg, sizeof(msg)); + reportError(Msg, sizeof(Msg)); } -#endif } /// This is hooking ELF's entry, it needs to save all machine state. extern "C" __attribute((naked)) void __bolt_hugify_self() { - __asm__ __volatile__(SAVE_ALL - "call __bolt_hugify_self_impl\n" - RESTORE_ALL - "jmp *__bolt_hugify_init_ptr(%%rip)\n" - :::); -} - +#if defined(__x86_64__) + __asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL + "jmp __bolt_hugify_start_program\n" :: + :); +#else + exit(1); #endif +} #endif diff --git a/bolt/test/runtime/X86/user-func-reorder.c b/bolt/test/runtime/X86/user-func-reorder.c --- a/bolt/test/runtime/X86/user-func-reorder.c +++ b/bolt/test/runtime/X86/user-func-reorder.c @@ -30,7 +30,7 @@ RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \ -RUN: --hugify --function-order=%p/Inputs/user_func_order.txt -o %t +RUN: --hugify=5.10 --function-order=%p/Inputs/user_func_order.txt -o %t RUN: llvm-nm --numeric-sort --print-armap %t | \ RUN: FileCheck %s -check-prefix=CHECK-NM RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT