Index: lld/ELF/CMakeLists.txt =================================================================== --- lld/ELF/CMakeLists.txt +++ lld/ELF/CMakeLists.txt @@ -32,6 +32,7 @@ InputFiles.cpp InputSection.cpp LTO.cpp + LinkerPropeller.cpp LinkerScript.cpp MapFile.cpp MarkLive.cpp Index: lld/ELF/Config.h =================================================================== --- lld/ELF/Config.h +++ lld/ELF/Config.h @@ -108,6 +108,20 @@ llvm::StringRef optRemarksPasses; llvm::StringRef optRemarksFormat; llvm::StringRef progName; + llvm::StringRef propeller; + llvm::StringRef propellerBBOrderFile; + std::vector propellerBBOrder; + llvm::StringRef propellerDumpSymbolOrder; + double propellerFallthroughWeight; + double propellerForwardJumpWeight; + double propellerBackwardJumpWeight; + uint64_t propellerClusterMergeSizeThreshold; + uint64_t propellerForwardJumpDistance; + uint64_t propellerBackwardJumpDistance; + uint64_t propellerChainSplitThreshold; + std::vector propellerDumpCfgs; + std::vector propellerDebugSymbols; + std::vector propellerOpts; llvm::StringRef printSymbolOrder; llvm::StringRef soName; llvm::StringRef sysroot; @@ -179,6 +193,12 @@ bool pie; bool printGcSections; bool printIcfSections; + bool propellerKeepNamedSymbols; + bool propellerPrintStats; + bool propellerReorderIP = false; + bool propellerReorderBlocks; + bool propellerReorderFuncs; + bool propellerSplitFuncs; bool relocatable; bool relrPackDynRelocs; bool saveTemps; Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -27,6 +27,7 @@ #include "ICF.h" #include "InputFiles.h" #include "InputSection.h" +#include "LinkerPropeller.h" #include "LinkerScript.h" #include "MarkLive.h" #include "OutputSections.h" @@ -1893,6 +1894,8 @@ for (InputSectionBase *s : f->getSections()) inputSections.push_back(cast(s)); + lld::propeller::doPropeller(); + llvm::erase_if(inputSections, [](InputSectionBase *s) { if (s->type == SHT_LLVM_SYMPART) { readSymbolPartitionSection(s); Index: lld/ELF/LinkerPropeller.h =================================================================== --- /dev/null +++ lld/ELF/LinkerPropeller.h @@ -0,0 +1,33 @@ +//===- LinkerPropeller.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the interface between LLD/ELF and Propeller. All interactions +// between LLD/ELF and propeller must be defined here. +// +// All dependencies on lld/ELF/*.h must happen in this file and +// LinkerPropeller.cpp. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_LINKER_PROPELLER_H +#define LLD_ELF_LINKER_PROPELLER_H + +#include "llvm/ADT/StringRef.h" + +namespace lld { +namespace propeller { +// Propeller interface to lld. +void doPropeller(); + +// Returns true if this is a BB symbol and shall be kept in the final binary's +// strtab. +bool isBBSymbolAndKeepIt(llvm::StringRef N); +} // namespace propeller +} // namespace lld + +#endif Index: lld/ELF/LinkerPropeller.cpp =================================================================== --- /dev/null +++ lld/ELF/LinkerPropeller.cpp @@ -0,0 +1,98 @@ +//===- LinkerPropeller.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the implementation of interface between LLD/ELF and Propeller. +// +// Current implementation first copies propeller parameters from +// lld::elf::Config instances into PropellerConfig. Then it transforms the +// vector into vector. +// +// "doPropeller" then passes PropellerConfig and vector to Propeller +// instance, and finally, after Propeller is done with its work, doPropeller +// passes the resulting symboll ordering back to lld. +// +// In summary, the dependencies of Propeller are: +// - a set of "lld::elf::InputFile"s. +// - command line arguments in lld::elf::Config +// - lld's being able to arrange section orders according to a vector of +// symbol names. +// +// All lld/ELF/Propeller/* only uses headers from lld/include, and llvm/include. +// +//===----------------------------------------------------------------------===// + +#include "LinkerPropeller.h" + +#include "Config.h" +#include "InputFiles.h" +#include "Propeller/PropellerConfig.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "lld/Common/PropellerCommon.h" + +#include +#include + +namespace lld { + +using elf::config; +using elf::InputFile; +using elf::objectFiles; + +namespace propeller { + +// Set up PropellerConfig from global lld config instnace. +static void setupConfig() { + propellerConfig.optPropeller = config->propeller; + propellerConfig.optLinkerOutputFile = config->outputFile; + +#define COPY_CONFIG(NAME) propellerConfig.opt##NAME = config->propeller##NAME + COPY_CONFIG(BackwardJumpDistance); + COPY_CONFIG(BackwardJumpWeight); + COPY_CONFIG(BBOrder); + COPY_CONFIG(ChainSplitThreshold); + COPY_CONFIG(DebugSymbols); + COPY_CONFIG(ClusterMergeSizeThreshold); + COPY_CONFIG(DumpCfgs); + COPY_CONFIG(DumpSymbolOrder); + COPY_CONFIG(FallthroughWeight); + COPY_CONFIG(ForwardJumpDistance); + COPY_CONFIG(ForwardJumpWeight); + COPY_CONFIG(KeepNamedSymbols); + COPY_CONFIG(Opts); + COPY_CONFIG(PrintStats); + COPY_CONFIG(ReorderBlocks); + COPY_CONFIG(ReorderFuncs); + COPY_CONFIG(SplitFuncs); + COPY_CONFIG(ReorderIP); +#undef COPY_CONFIG +} + +// Propeller framework entrance. +void doPropeller() { + if (config->propeller.empty()) + return; + + setupConfig(); + + // Propeller work starts here. In next CL. + + // prop = make(); + // ... + // ... + // ... +} + +bool isBBSymbolAndKeepIt(llvm::StringRef name) { + // Real work done in next CL. + return false; +} + +PropellerConfig propellerConfig; +} // namespace propeller +} // namespace lld Index: lld/ELF/Propeller/PropellerConfig.h =================================================================== --- /dev/null +++ lld/ELF/Propeller/PropellerConfig.h @@ -0,0 +1,45 @@ +//===-------------------- PropellerConfig.h -------------------------------===// +// + +#ifndef LLD_ELF_PROPELLER_CONFIG_H +#define LLD_ELF_PROPELLER_CONFIG_H + +#include "llvm/ADT/StringRef.h" + +#include +#include + +using llvm::StringRef; + +namespace lld { +namespace propeller { + +struct PropellerConfig { + uint64_t optBackwardJumpDistance; + double optBackwardJumpWeight; + std::vector optBBOrder; + uint64_t optChainSplitThreshold; + std::vector optDebugSymbols; + std::vector optDumpCfgs; + uint64_t optClusterMergeSizeThreshold; + StringRef optDumpSymbolOrder; + double optFallthroughWeight; + uint64_t optForwardJumpDistance; + double optForwardJumpWeight; + bool optKeepNamedSymbols; + StringRef optLinkerOutputFile; + std::vector optOpts; + bool optPrintStats; + StringRef optPropeller; + bool optReorderBlocks; + bool optReorderFuncs; + bool optSplitFuncs; + bool optReorderIP; +}; + +extern PropellerConfig propellerConfig; + +} // namespace propeller +} // namespace lld + +#endif Index: lld/ELF/SyntheticSections.h =================================================================== --- lld/ELF/SyntheticSections.h +++ lld/ELF/SyntheticSections.h @@ -27,6 +27,7 @@ #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Endian.h" #include +#include namespace lld { namespace elf { @@ -585,6 +586,9 @@ unsigned getNumSymbols() const { return symbols.size() + 1; } size_t getSymbolIndex(Symbol *sym); ArrayRef getSymbols() const { return symbols; } + // SymName end address -> mapping. + // See details in SymbolTableBaseSection::addSymbol. + std::map> EndsMap; protected: void sortSymTabSymbols(); Index: lld/ELF/SyntheticSections.cpp =================================================================== --- lld/ELF/SyntheticSections.cpp +++ lld/ELF/SyntheticSections.cpp @@ -24,6 +24,7 @@ #include "Writer.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/PropellerCommon.h" #include "lld/Common/Strings.h" #include "lld/Common/Threads.h" #include "lld/Common/Version.h" @@ -2050,12 +2051,72 @@ *i++ = entry; } +// When calling addSymbol, we add each BB symbol in the reverse order of its +// index. For example, we add bb symbols in such a way: +// addSymbol("aaaa.BB.foo") +// addSymbol("aaa.BB.foo") +// addSymbol("aa.BB.foo") +// addSymbol("a.BB.foo") +// +// Note, the compiler makes sure the generated "aaaa.BB.foo", "aaa.BB.foo", etc +// in the object strtab are stored optimally - that only "aaaa.BB.foo" is stored +// in the strtab. +// [Symtab section] +// [symbol name] [strtab offset] +// aaaa.BB.foo 0 +// aaa.BB.foo 1 +// aa.BB.foo 2 +// a.BB.foo 3 +// [Strtab section] +// aaaa.BB.foo\0\0 +// +// "EndsMap" here stores the mapping from the end address of StringRef to a pair +// of . For example: +// From the object file we have the following StringRefs: +// aaaa.BB.foo [start=400, size=11] +// aaa.BB.foo [start=401, size=10] +// aa.BB.foo [start=402, size=9] +// a.BB.foo [start=403, size=8] +// Note: the end address for all the 4 symbols are 411. +// After adding "aaaa.BB.foo", we have the following StrTab and EndMap: +// +// Strtab: +// dummy\0 +// aaaa.BB.foo\0 occupies [6-17] +// \0 +// +// EndsMap: +// { +// 411:{6,11} +// } +// Then we try to add "aaa.BB.foo", which has the same "EndKey" 411, so we know +// we can resue the strtab entry from 6-17, and don't create a new strtabe entry +// we just reuse the existing one. void SymbolTableBaseSection::addSymbol(Symbol *b) { // Adding a local symbol to a .dynsym is a bug. assert(this->type != SHT_DYNSYM || !b->isLocal()); + assert(this->type != SHT_DYNSYM || !b->isLocal()); + StringRef SName = b->getName(); + uint64_t EndKey = (uint64_t)(SName.data() + SName.size()); + auto I = EndsMap.find(EndKey); + if (I != EndsMap.end()) { + uint64_t offset = I->second.first; + uint32_t size = I->second.second; + int64_t diff = size - SName.size(); + if (diff >= 0) { + uint64_t new_offset = offset + diff; + symbols.push_back({b, new_offset}); + return; + } + } bool hashIt = b->isLocal(); - symbols.push_back({b, strTabSec.addString(b->getName(), hashIt)}); + uint32_t offset = strTabSec.addString(b->getName(), hashIt); + symbols.push_back({b, offset}); + if (lld::propeller::SymbolEntry::isBBSymbol(b->getName())) { + EndsMap.emplace(std::piecewise_construct, std::forward_as_tuple(EndKey), + std::forward_as_tuple(offset, SName.size())); + } } size_t SymbolTableBaseSection::getSymbolIndex(Symbol *sym) { Index: lld/ELF/Writer.cpp =================================================================== --- lld/ELF/Writer.cpp +++ lld/ELF/Writer.cpp @@ -11,6 +11,7 @@ #include "ARMErrataFix.h" #include "CallGraphSort.h" #include "Config.h" +#include "LinkerPropeller.h" #include "LinkerScript.h" #include "MapFile.h" #include "OutputSections.h" @@ -21,6 +22,7 @@ #include "Target.h" #include "lld/Common/Filesystem.h" #include "lld/Common/Memory.h" +#include "lld/Common/PropellerCommon.h" #include "lld/Common/Strings.h" #include "lld/Common/Threads.h" #include "llvm/ADT/StringMap.h" @@ -638,6 +640,9 @@ if (config->emitRelocs) return true; + if (lld::propeller::isBBSymbolAndKeepIt(sym.getName())) + return true; + // In ELF assembly .L symbols are normally discarded by the assembler. // If the assembler fails to do so, the linker discards them if // * --discard-locals is used. @@ -685,6 +690,8 @@ return; for (InputFile *file : objectFiles) { ObjFile *f = cast>(file); + std::vector localNonBBSymbols; + std::vector localBBSymbols; for (Symbol *b : f->getLocalSymbols()) { if (!b->isLocal()) fatal(toString(f) + @@ -698,8 +705,25 @@ continue; if (!shouldKeepInSymtab(*dr)) continue; - in.symTab->addSymbol(b); + + if (lld::propeller::SymbolEntry::isBBSymbol(b->getName())) + localBBSymbols.emplace_back(b); + else + localNonBBSymbols.emplace_back(b); } + + // We need to add bbsymbols in reverse order of their size. See details in + // SymbolTableBaseSection::addSymbol(Symbol *b). + llvm::sort(localBBSymbols, [](Symbol *A, Symbol *B) { + return A->getName().size() > B->getName().size(); + }); + + // Add BB symbols to SymTab first. + for (auto *S : localBBSymbols) + in.symTab->addSymbol(S); + + for (auto *S : localNonBBSymbols) + in.symTab->addSymbol(S); } } Index: lld/include/lld/Common/PropellerCommon.h =================================================================== --- /dev/null +++ lld/include/lld/Common/PropellerCommon.h @@ -0,0 +1,163 @@ +#ifndef LLD_ELF_PROPELLER_COMMON_H +#define LLD_ELF_PROPELLER_COMMON_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ObjectFile.h" + +#include + +using llvm::SmallVector; +using llvm::StringRef; + +namespace lld { +namespace propeller { + +static const char BASIC_BLOCK_SEPARATOR[] = ".BB."; +static const char *BASIC_BLOCK_UNIFIED_CHARACTERS = "arlL"; + +// This data structure is shared between lld propeller components and +// create_llvm_prof. In short, create_llvm_prof parses the binary, wraps all the +// symbol information using SymbolEntry class, whereas in Propeller, PropFile +// class parses the propeller profile (which is generated by create_llvm_prof), +// and wraps the symbol information in SymbolEntry. In other words, SymbolEntry +// is the interface shared between create_llvm_prof and Propeller. +// [create_llvm_prof refer to: +// https://github.com/shenhanc78/autofdo/tree/plo-dev] +struct SymbolEntry { + + enum BBTagTypeEnum : unsigned char { + BB_NONE = 0, // For functions. + BB_NORMAL, // Ordinary BB, 'a'. + BB_RETURN, // Return BB, 'r'. + BB_LANDING_PAD, // Landing pad BB, 'l'. + BB_RETURN_AND_LANDING_PAD // Landing pad and return BB, 'L'. + }; + + using AliasesTy = SmallVector; + + SymbolEntry(uint64_t O, const StringRef &N, AliasesTy &&As, uint64_t A, + uint64_t S, uint8_t T, bool BB = false, + SymbolEntry *FuncPtr = nullptr) + : Ordinal(O), Name(N), Aliases(As), Addr(A), Size(S), Type(T), BBTag(BB), + BBTagType(BB_NONE), HotTag(false), ContainingFunc(FuncPtr) {} + + // Unique index number across all symbols that participate linking. + uint64_t Ordinal; + // For a function symbol, it's the full name. For a bb symbol this is only the + // bbindex part, which is the number of "a"s before the ".bb." part. For + // example "8", "10", etc. Refer to Propfile::createFunctionSymbol and + // Propfile::createBasicBlockSymbol. + StringRef Name; + // Only valid for function (BBTag == false) symbols. And aliases[0] always + // equals to Name. For example, SymbolEntry.Name = "foo", SymbolEntry.Aliases + // = {"foo", "foo2", "foo3"}. + AliasesTy Aliases; + uint64_t Addr; + uint64_t Size; + uint8_t Type; // Of type: llvm::objet::SymbolRef::Type. + bool BBTag; // Whether this is a basic block section symbol. + BBTagTypeEnum BBTagType; + + bool HotTag; // Whether this symbol is listed in the propeller section. + // For BBTag symbols, this is the containing fuction pointer, for a normal + // function symbol, this points to itself. This is neverl nullptr. + SymbolEntry *ContainingFunc; + + bool isReturnBlock() const { + return BBTagType == BB_RETURN || BBTagType == BB_RETURN_AND_LANDING_PAD; + } + + bool isLandingPadBlock() const { + return BBTagType == BB_LANDING_PAD || + BBTagType == BB_RETURN_AND_LANDING_PAD; + } + + bool containsAddress(uint64_t A) const { + return Addr <= A && A < Addr + Size; + } + + bool containsAnotherSymbol(SymbolEntry *O) const { + if (O->Size == 0) { + // Note if O's size is 0, we allow O on the end boundary. For example, if + // foo.BB.4 is at address 0x10. foo is [0x0, 0x10), we then assume foo + // contains foo.BB.4. + return this->Addr <= O->Addr && O->Addr <= this->Addr + this->Size; + } + return containsAddress(O->Addr) && containsAddress(O->Addr + O->Size - 1); + } + + bool operator<(const SymbolEntry &Other) const { + return this->Ordinal < Other.Ordinal; + } + + bool isFunction() const { + return this->Type == llvm::object::SymbolRef::ST_Function; + } + + // Return true if this SymbolEntry is a containing function for BBName. For + // example, if BBName is given as "aa.BB.foo", and SymbolEntry.Name = "foo", + // then SymbolEntry.isFunctionForBBName(BBName) == true. BBNames are from ELF + // object files. + bool isFunctionForBBName(StringRef BBName) const { + auto A = BBName.split(BASIC_BLOCK_SEPARATOR); + if (A.second == Name) + return true; + for (auto N : Aliases) + if (A.second == N) + return true; + return false; + } + + // Return true if "SymName" is a BB symbol, e.g., in the form of + // "a.BB.funcname", and set FuncName to the part after ".BB.", BBIndex to + // before ".BB.", if the pointers are nonnull. + static bool isBBSymbol(const StringRef &SymName, + StringRef *FuncName = nullptr, + StringRef *BBIndex = nullptr) { + if (SymName.empty()) + return false; + auto R = SymName.split(BASIC_BLOCK_SEPARATOR); + if (R.second.empty()) + return false; + for (auto *I = R.first.bytes_begin(), *J = R.first.bytes_end(); I != J; ++I) + if (strchr(BASIC_BLOCK_UNIFIED_CHARACTERS, *I) == NULL) + return false; + if (FuncName) + *FuncName = R.second; + if (BBIndex) + *BBIndex = R.first; + return true; + } + + static BBTagTypeEnum toBBTagType(const char c) { + switch (c) { + case 'a': + return BB_NORMAL; + case 'r': + return BB_RETURN; + case 'l': + return BB_LANDING_PAD; + case 'L': + return BB_RETURN_AND_LANDING_PAD; + default:; + } + return BB_NONE; + } + + static const uint64_t INVALID_ADDRESS = uint64_t(-1); +}; + +struct SymbolEntryOrdinalLessComparator { + bool operator()(SymbolEntry *S1, SymbolEntry *S2) const { + if (!S1 && S2) + return true; + if ((S1 && !S2) || (!S1 && !S2)) + return false; + return S1->Ordinal < S2->Ordinal; + } +}; + +} // namespace propeller +} // namespace lld +#endif