Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -27,6 +27,7 @@ #include "ICF.h" #include "InputFiles.h" #include "InputSection.h" +#include "LinkerPropeller.h" #include "LinkerScript.h" #include "MarkLive.h" #include "OutputSections.h" @@ -852,6 +853,10 @@ config->cref = args.hasFlag(OPT_cref, OPT_no_cref, false); config->defineCommon = args.hasFlag(OPT_define_common, OPT_no_define_common, !args.hasArg(OPT_relocatable)); + config->optimizeBBJumps = + args.hasFlag(OPT_optimize_bb_jumps, + OPT_no_optimize_bb_jumps, false); + config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true); config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true); config->disableVerify = args.hasArg(OPT_disable_verify); @@ -896,6 +901,12 @@ config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); + config->ltoBasicBlockSections = + args.getLastArgValue(OPT_lto_basicblock_sections); + config->ltoUniqueBBSectionNames = + args.hasFlag(OPT_lto_unique_bb_section_names, + OPT_no_lto_unique_bb_section_names, + false); config->mapFile = args.getLastArgValue(OPT_Map); config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0); config->mergeArmExidx = @@ -922,12 +933,86 @@ args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false); config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order); + + config->propeller = args.getLastArgValue(OPT_propeller); + + config->propellerKeepNamedSymbols = + args.hasFlag(OPT_propeller_keep_named_symbols, + OPT_no_propeller_keep_named_symbols, false); + + config->propellerDumpSymbolOrder = + args.getLastArgValue(OPT_propeller_dump_symbol_order); + + + config->propellerPrintStats = + args.hasFlag(OPT_propeller_print_stats, + OPT_no_propeller_print_stats, false); + + config->propellerDumpCfgs = args.getAllArgValues(OPT_propeller_dump_cfg); + + config->propellerDebugSymbols = + args.getAllArgValues(OPT_propeller_debug_symbol); + + config->propellerReorderBlocks = config->propellerReorderFuncs = + config->propellerSplitFuncs = !config->propeller.empty(); + + config->propellerFallthroughWeight = + args::getFloat(args, OPT_propeller_fallthrough_weight, 1.0); + config->propellerForwardJumpWeight = + args::getFloat(args, OPT_propeller_forward_jump_weight, 0.1); + config->propellerBackwardJumpWeight = + args::getFloat(args, OPT_propeller_backward_jump_weight, 0.1); + + config->propellerForwardJumpDistance = + args::getFloat(args, OPT_propeller_forward_jump_distance, 1024); + config->propellerBackwardJumpDistance = + args::getFloat(args, OPT_propeller_backward_jump_distance, 640); + config->propellerChainSplitThreshold = + args::getFloat(args, OPT_propeller_chain_split_threshold, 128); + + // Parse Propeller flags. + auto propellerOpts = args.getAllArgValues(OPT_propeller_opt); + bool splitFuncsExplicit = false; + for(auto& propellerOpt: propellerOpts){ + StringRef S = StringRef(propellerOpt); + if (S == "reorder-funcs"){ + config->propellerReorderFuncs = true; + } else if (S == "no-reorder-funcs") { + config->propellerReorderFuncs = false; + } else if (S == "reorder-blocks") { + config->propellerReorderBlocks = true; + } else if (S == "no-reorder-blocks") { + config->propellerReorderBlocks = false; + } else if (S == "split-funcs") { + config->propellerSplitFuncs = true; + splitFuncsExplicit = true; + } else if (S == "no-split-funcs") { + config->propellerSplitFuncs = false; + } else + error("unknown propeller option: " + S); + } + + if (!config->propeller.empty() && !config->propellerReorderBlocks) { + if (splitFuncsExplicit){ + error("propeller: Inconsistent combination of propeller optimizations" + " 'split-funcs' and 'no-reorder-blocks'."); + } else { + warn("propeller: no-reorder-blocks implicitly sets no-split-funcs."); + config->propellerSplitFuncs = false; + } + } + config->rpath = getRpath(args); config->relocatable = args.hasArg(OPT_relocatable); config->saveTemps = args.hasArg(OPT_save_temps); config->searchPaths = args::getStrings(args, OPT_library_path); config->sectionStartMap = getSectionStartMap(args); config->shared = args.hasArg(OPT_shared); + + config->shrinkJumpsAggressively = + args.hasFlag(OPT_shrink_jumps_aggressively, + OPT_no_shrink_jumps_aggressively, true); + config->singleRoRx = args.hasArg(OPT_no_rosegment); config->soName = args.getLastArgValue(OPT_soname); config->sortSection = getSortSection(args); @@ -1876,6 +1961,8 @@ for (InputSectionBase *s : f->getSections()) inputSections.push_back(cast(s)); + lld::propeller::doPropeller(); + llvm::erase_if(inputSections, [](InputSectionBase *s) { if (s->type == SHT_LLVM_SYMPART) { readSymbolPartitionSection(s); Index: lld/ELF/LinkerPropeller.h =================================================================== --- /dev/null +++ lld/ELF/LinkerPropeller.h @@ -0,0 +1,26 @@ +//===- LinkerPropeller.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the interface between LLD/ELF and Propeller. All interactions +// between LLD/ELF and propeller must be defined here. +// +// All dependencies on lld/ELF/*.h must happen in this file and +// LinkerPropeller.cpp. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_LINKER_PROPELLER_H +#define LLD_ELF_LINKER_PROPELLER_H + +namespace lld { +namespace propeller { +void doPropeller(); +} +} // namespace lld + +#endif Index: lld/ELF/LinkerPropeller.cpp =================================================================== --- /dev/null +++ lld/ELF/LinkerPropeller.cpp @@ -0,0 +1,96 @@ +//===- LinkerPropeller.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the implementation of interface between LLD/ELF and Propeller. +// +// Current implementation first copies propeller parameters from +// lld::elf::Config instances into PropellerConfig. Then it transforms the +// vector into vector. +// +// "doPropeller" then passes PropellerConfig and vector to Propeller +// instance, and finally, after Propeller is done with its work, doPropeller +// passes the resulting symboll ordering back to lld. +// +// In summary, the dependencies of Propeller are: +// - a set of "lld::elf::InputFile"s. +// - command line arguments in lld::elf::Config +// - lld's being able to arrange section orders according to a vector of +// symbol names. +// +// All lld/ELF/Propeller/* only uses headers from lld/include, and llvm/include. +// +//===----------------------------------------------------------------------===// + +#include "LinkerPropeller.h" + +#include "Config.h" +#include "InputFiles.h" +#include "Propeller/Propeller.h" +#include "Propeller/PropellerCFG.h" +#include "Propeller/PropellerConfig.h" + +#include "lld/Common/ErrorHandler.h" + +#include +#include + +namespace lld { + +using elf::config; +using elf::InputFile; +using elf::objectFiles; + +namespace propeller { + +static void setupConfig() { + propellerConfig.optPropeller = config->propeller; + propellerConfig.optLinkerOutputFile = config->outputFile; + +#define COPY_CONFIG(NAME) propellerConfig.opt##NAME = config->propeller##NAME + COPY_CONFIG(BackwardJumpDistance); + COPY_CONFIG(BackwardJumpWeight); + COPY_CONFIG(ChainSplitThreshold); + COPY_CONFIG(DebugSymbols); + COPY_CONFIG(DumpCfgs); + COPY_CONFIG(DumpSymbolOrder); + COPY_CONFIG(FallthroughWeight); + COPY_CONFIG(ForwardJumpDistance); + COPY_CONFIG(ForwardJumpWeight); + COPY_CONFIG(Opts); + COPY_CONFIG(PrintStats); + COPY_CONFIG(ReorderBlocks); + COPY_CONFIG(ReorderFuncs); + COPY_CONFIG(SplitFuncs); +#undef COPY_CONFIG +} + +void doPropeller() { + if (config->propeller.empty()) + return; + + setupConfig(); + Propeller propeller; + if (!propeller.checkTarget()) + warn("[Propeller]: Propeller skipped '" + config->outputFile + "'."); + + std::vector objectViews; + std::for_each(objectFiles.begin(), objectFiles.end(), + [&objectViews](const InputFile *inf) { + auto *ov = Propeller::createObjectView( + inf->getName(), objectViews.size() + 1, inf->mb); + if (ov) + objectViews.push_back(ov); + }); + if (propeller.processFiles(objectViews)) + config->symbolOrderingFile = propeller.genSymbolOrderingFile(); + else + error("Propeller stage failed."); +} + +} // namespace propeller +} // namespace lld Index: lld/ELF/Propeller/CMakeLists.txt =================================================================== --- /dev/null +++ lld/ELF/Propeller/CMakeLists.txt @@ -0,0 +1,19 @@ +find_package(Protobuf) +if( Protobuf_FOUND ) + add_compile_definitions(PROPELLER_PROTOBUF GOOGLE_PROTOBUF_NO_RTTI) + include_directories( ${Protobuf_INCLUDE_DIRS} ) + protobuf_generate_cpp(PROPELLER_PROTO_SRCS PROPELLER_PROTO_HDRS propeller_cfg.proto) + set(PROPELLER_PROTOBUF_LIBS ${Protobuf_LIBRARIES}) +endif() + +add_lld_library(lldPropeller + Propeller.cpp + PropellerBBReordering.cpp + PropellerCFG.cpp + PropellerFuncOrdering.cpp + PropellerProtobuf.cpp + ${PROPELLER_PROTO_SRCS} + + LINK_LIBS + ${PROPELLER_PROTOBUF_LIBS} +) Index: lld/ELF/Propeller/Propeller.h =================================================================== --- /dev/null +++ lld/ELF/Propeller/Propeller.h @@ -0,0 +1,357 @@ +//===------------------------ Propeller.h ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A. About the propeller framework. + +// Propeller is framework that, with the aid of embedded information +// in ELF obejct files, does optimization at the linking phase. +// +// B. How (current) propeller works. +// +// [The compiler part] +// +// Each basicblock is represented using a standalone elf section. +// +// [The profile part] +// +// A propeller-format profile ('propfile') is generated, which +// contains counters for jumps/calls/returns of each bb. +// +// [The lld part] +// +// LLD handles execution to propeller (Propeller.h is the interface +// that works with lld), which does a few things for each ELF object +// file: +// +// (1). generates control flow graph (CFG) for each function, this +// is possible because each basicblock section resides in a single +// elf section and we use relocation entries to determine jump/call +// targets of each basicblock section +// +// (2). parses propfile (the propeller-format profile), apply +// basicblock counters, edge counters to CFGs +// +// (3). passes information in (2) to BBReordering and +// FunctionOrdering pass, which generates list of optimal basicblock +// symbol ordering +// +// (4). Propeller feeds the list (generated in (3)) back to lld, and +// lld continues to work as usual. +// +// Some misconections: +// +// * Propeller only works on top of ThinLTO (regular PGO, csPGO, +// etc). +// +// The above statement is not correct. Propeller works on any binaries +// and brings performance improvement on top of the binary, regardless +// of how the binaries are optimized. +// +// * Propeller is an extension to -ffunction-sections and -fdata-sections. +// +// The above statement is not correct. Propeller is a *general* +// framework, it will be able to do things like reodering individual +// insns, remove/insert spill insns, etc. And we have plans for future +// optimizations based on propeller framework (not on bb sections). +// +// However, current Propeller's main optimization (function-splitting, +// function-reordering, basicblock-reordering) is done via +// -fbasicblock-sections, which is an extension of +// -ffunction-sections. +// +// ========================================================================= +// +// Propeller.h defines Propeller framework classes: +// +// Propfile - parses and wraps propeller profile +// +// Propeller - the main propeller framework class +// +// Propeller starts by checking if "-o" file matches propeller profile +// (Propeller::checkPropellerTarget), then it enters +// Propeller::processFiles(), which builds control flow graph (CFG) +// for each valid ELF object file (Propeller::processFile -> +// CFGBuilder::buildCFGs). +// +// After control flow graphs are build, Propeller starts parsing +// Propeller profile (the Propfile class). In this step, basicblock +// (BB) symbols are created, branch/call counters are read, and *very +// importantly*, the counters are applied to the CFG. For example, +// upon seeing two consecutive branch records in the propeller +// profile: a->b, c->d, we not only increment edge counters for a->b, +// c->d, we also walks from b->c, and increment basicblock and edge +// counters in between. This last step can only be done after we have +// a complete CFG for the function. +// +// The CFG information is stored in Propeller::CFGMap. +// +// After we have CFGs with complete counters for edges/bbs, we pass the +// information to optimization passes. For now, depending on +// propellerReorderFuncs, propellerReorderBlocks or propellerSplitFuncs, +// propeller generates a list of basicblock symbol orders and feed it the origin +// linker phase. This step is done in Propeller::genSymbolOrderingFile. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_PROPELLER_H +#define LLD_ELF_PROPELLER_H + +#include "PropellerProtobuf.h" + +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/LLVM.h" +#include "lld/Common/PropellerCommon.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/StringSaver.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace lld { +namespace propeller { + +class ControlFlowGraph; +class CFGEdge; +class CFGNode; +class ObjectView; +class Propeller; +struct PropellerConfig; + +// Propeller profile parser. +// +// A sample propeller profile is like below: +// +// Symbols +// 1 0 N.init/_init +// 2 0 N.plt +// ... +// ... +// 11 7c Nmain +// 12 f 11.1 +// 13 28 11.2 +// 14 b 11.3 +// 15 a 11.4 +// 16 65 N__libc_csu_init +// 17 2 N__libc_csu_fini +// 18 0 N.fini/_fini +// 19 5e N_ZN9assistantD2Ev/_ZN9assistantD1Ev +// Branches +// 10 12 232590 R +// 12 10 234842 C +// 12 14 143608 +// Fallthroughs +// 10 10 225131 +// !func1 +// !func2 +// +// The file consists of 4 parts, "Symbols", "Branches", "Fallthroughs" and +// Funclist. +// +// Each line in "Symbols" section contains the following field: +// index - in decimal, unique for each symbol, start from 1 +// size - in hex, without "0x" +// name - either starts with "N" or a digit. In the former case, +// everything after N is the symbol name. In the latter case, it's +// in the form of "a.b", "a" is a symbol index, "b" is the bb +// identification string (could be an index number). For the above +// example, name "11.2" means "main.bb.2", because "11" points to +// symbol main. Also note, symbols could have aliases, in such +// case, aliases are concatenated with the original name with a +// '/'. For example, symbol 19 contains 2 aliases. +// Note, the symbols listed are in strict non-decreasing address order. +// +// Each line in "Branches" section contains the following field: +// from - sym_index, in decimal +// to - sym_index, in decimal +// cnt - counter, in decimal +// C/R - a field indicate whether this is a function call or a return, +// could be empty if it's just a normal branch. +// +// Each line in "Fallthroughs" section contains exactly the same fields as in +// "Branches" section, except the "C" field. +// +// Funclist contains lines that starts with "!", and everything following that +// will be the function name that's to be consumed by compiler (for bb section +// generation purpose). +class Propfile { +public: + Propfile(Propeller &p, const std::string &pName) + : PropfileStrSaver(BPAllocator), PropfName(pName), PropfStream(), + Prop(p) {} + + // Check whether "outputFile" matches "@" directives in the propeller profile. + bool matchesOutputFileName(const StringRef outputFile); + + // Read "Symbols" sections in the propeller profile and create + // SymbolOrdinalMap and SymbolNameMap. + bool readSymbols(); + SymbolEntry *findSymbol(StringRef symName); + bool processProfile(); + + // For each function symbol line defintion, this function creates a + // SymbolEntry instance and places it in SymbolOrdinalMap and SymbolNameMap. + // Function symbol defintion line is like below. (See comments at the head of + // the file) + // 11 7c Nmain/alias1/alias2 + // ^^ ^^ ^^ + // Ordinal Size Name and aliases + SymbolEntry *createFunctionSymbol(uint64_t ordinal, const StringRef &name, + SymbolEntry::AliasesTy &&aliases, + uint64_t size) { + auto *sym = new SymbolEntry(ordinal, name, std::move(aliases), + SymbolEntry::INVALID_ADDRESS, size, + llvm::object::SymbolRef::ST_Function); + SymbolOrdinalMap.emplace(std::piecewise_construct, + std::forward_as_tuple(ordinal), + std::forward_as_tuple(sym)); + for (auto &a : sym->Aliases) + SymbolNameMap[a][""] = sym; + + if (sym->Aliases.size() > 1) + FunctionsWithAliases.push_back(sym); + + return sym; + } + + // For each bb symbol line defintion, this function creates a + // SymbolEntry instance and places it in SymbolOrdinalMap and SymbolNameMap. + // Function symbol defintion line is like below. (See comments at the head of + // the file) + // 12 f 11.1 + // ^^ ^ ^^^^ + // Ordinal Size func_ordinal.bb_index + SymbolEntry *createBasicBlockSymbol(uint64_t ordinal, SymbolEntry *function, + StringRef &bBIndex, uint64_t size) { + // bBIndex is of the form "1", "2", it's a stringref to integer. + assert(!function->BBTag && function->isFunction()); + auto *sym = + new SymbolEntry(ordinal, bBIndex, SymbolEntry::AliasesTy(), + SymbolEntry::INVALID_ADDRESS, size, + llvm::object::SymbolRef::ST_Unknown, true, function); + SymbolOrdinalMap.emplace(std::piecewise_construct, + std::forward_as_tuple(ordinal), + std::forward_as_tuple(sym)); + for (auto &a : function->Aliases) { + SymbolNameMap[a][bBIndex] = sym; + } + return sym; + } + + void reportParseError(StringRef msg) const; + + llvm::BumpPtrAllocator BPAllocator; + llvm::UniqueStringSaver PropfileStrSaver; + std::string PropfName; + std::ifstream PropfStream; + Propeller &Prop; + // Ordial -> SymbolEntry map. This also owns SymbolEntry instances. + std::map> SymbolOrdinalMap; + // SymbolNameMap is ordered in the following way: + // SymbolNameMap[foo][""] = functionSymbol; + // SymbolNameMap[foo]["1"] = fun.bb.1.Symbol; + // SymbolNameMap[foo]["2"] = fun.bb.2.Symbol; + // etc... + std::map> SymbolNameMap; + std::vector FunctionsWithAliases; + uint64_t LineNo; + char LineTag; +}; + +class Propeller { +public: + Propeller(); + ~Propeller(); + + // Returns true if linker output target matches propeller profile. + bool checkTarget(); + bool processFiles(std::vector &files); + void processFile(ObjectView *view); + CFGNode *findCfgNode(uint64_t symbolOrdinal); + void calculateNodeFreqs(); + std::vector genSymbolOrderingFile(); + void calculateLegacy(std::list &SymList, + std::list::iterator hotPlaceHolder, + std::list::iterator coldPlaceHolder); + template void forEachCfgRef(Visitor v) { + for (auto &p : CFGMap) + v(*(*(p.second.begin()))); + } + + bool dumpCfgs(); + + static ObjectView *createObjectView(const StringRef &vN, + const uint32_t ordinal, + const MemoryBufferRef &fR); + + std::vector> Views; + // Same named CFGs may exist in different object files (e.g. weak + // symbols.) We always choose symbols that appear earlier on the + // command line. Note: implementation is in the .cpp file, because + // ControlFlowGraph here is an incomplete type. + struct ObjectViewOrdinalComparator { + bool operator()(const ControlFlowGraph *a, const ControlFlowGraph *b) const; + }; + using CfgMapTy = + std::map>; + CfgMapTy CFGMap; + std::unique_ptr Propf; + uint32_t ProcessFailureCount; // Number of files that are not processed by + // Propeller. + // We call Propeller::processFile in parallel to create CFGs for + // each file, after the CFGs are created, each processFile thread + // then puts CFGs into Propeller::CFGMap (see above). Lock is used + // to guard this Propeller::CFGMap critical section. + std::mutex Lock; + +#ifdef PROPELLER_PROTOBUF + std::unique_ptr protobufPrinter; +#endif + +}; + +// When no "-propeller-keep-named-symbols" specified, we remove all BB symbols +// that are hot, and we keep only the first code BB symbol that starts the cold +// code region of the same function. See Below: +// Hot: +// foo +// foo.bb.1 <= delete +// foo.bb.2 <= delete +// bar +// bar.bb.1 <= delete +// bar.bb.3 <= delete +// Cold: +// foo.bb.3 +// foo.bb.4 <= delete +// foo.bb.5 <= delete +// bar.bb.2 +// bar.bb.4 <= delete +// bar.bb.5 <= delete +struct PropellerLegacy { + std::set BBSymbolsToKeep; + + bool shouldKeepBBSymbol(StringRef symName) { + if (!SymbolEntry::isBBSymbol(symName)) + return true; + return BBSymbolsToKeep.find(symName) != BBSymbolsToKeep.end(); + } +}; + +extern PropellerLegacy PropLeg; + +extern PropellerConfig propellerConfig; + +} // namespace propeller +} // namespace lld +#endif Index: lld/ELF/Propeller/Propeller.cpp =================================================================== --- /dev/null +++ lld/ELF/Propeller/Propeller.cpp @@ -0,0 +1,645 @@ +//===------------------------- Propeller.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Propeller.cpp is the entry point to Propeller framework. The main +// functionalities are: +// +// - parses propeller profile file, which contains profile information in the +// granularity of basicblocks. (a) +// +// - parses each ELF object file and generates CFG based on relocation +// information of each basicblock section. +// +// - maps profile in (a) onto (b) and get CFGs with profile counters (c) +// +// - calls optimization passes that uses (c). +// +//===----------------------------------------------------------------------===// + +#include "Propeller.h" +#include "PropellerBBReordering.h" +#include "PropellerConfig.h" +#include "PropellerCFG.h" +#include "PropellerFuncOrdering.h" + +#ifdef PROPELLER_PROTOBUF +#include "propeller_cfg.pb.h" +#endif +#include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Parallel.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include +#include + + +#include +#include +#include +#include // For std::accumulate. +#include +#include + +namespace lld { +namespace propeller { + +Propeller::Propeller() : Propf(nullptr) {} + +Propeller::~Propeller() {} + +// Read the "@" directives in the propeller file, compare it against "-o" +// filename, return true "-o" file name equals to one of the "@" directives. +bool Propfile::matchesOutputFileName(const StringRef outputFileName) { + int outputFileTagSeen = 0; + std::string line; + LineNo = 0; + while ((std::getline(PropfStream, line)).good()) { + ++LineNo; + if (line.empty()) + continue; + if (line[0] != '@') + break; + ++outputFileTagSeen; + if (StringRef(line.c_str() + 1) == outputFileName) + return true; + } + if (outputFileTagSeen) + return false; + // If no @outputFileName is specified, reset the stream and assume linker + // shall proceed propellering. + PropfStream.close(); + PropfStream.open(PropfName); + LineNo = 0; + return true; +} + +// Given a symbol name, return the corresponding SymbolEntry pointer. +// This is done by looking into table SymbolNameMap, which is a 2-dimension +// lookup table. The first dimension is the function name, the second one the +// bbindex. For example, symbol "111.bb.foo" is placed in +// SymbolNameMap["foo"]["3"], symbol "foo" is placed in +// SymbolNameMap["foo"][""]. +SymbolEntry *Propfile::findSymbol(StringRef symName) { + StringRef funcName; + StringRef bbIndex; + std::string tmpStr; + if (!SymbolEntry::isBBSymbol(symName, &funcName, &bbIndex)) { + funcName = symName; + bbIndex = ""; + } else { + // When symName is like "11111.bb.foo", set BBIndex to "5". + // "1111" -> "4". + tmpStr = std::to_string(bbIndex.size()); + bbIndex = StringRef(tmpStr); + } + auto L1 = SymbolNameMap.find(funcName); + if (L1 != SymbolNameMap.end()) { + auto L2 = L1->second.find(bbIndex); + if (L2 != L1->second.end()) + return L2->second; + } + return nullptr; +} + +void Propfile::reportParseError(const StringRef msg) const { + error(PropfName + ":" + std::to_string(LineNo) + ": " + msg); +} + +// Refer header file for detailed information about symbols section. +bool Propfile::readSymbols() { + std::string line; + // A list of bbsymbols that + // appears before its wrapping function. This should be rather rare. + std::list> bbSymbols; + while (std::getline(PropfStream, line).good()) { + ++LineNo; + if (line.empty()) + continue; + if (line[0] == '#' || line[0] == '!' || line[0] == '@') + continue; + if (line[0] == 'B' || line[0] == 'F') { + LineTag = line[0]; + break; // Done symbol section. + } + if (line[0] == 'S') { + LineTag = line[0]; + continue; + } + StringRef lineStrRef(line); + + uint64_t symOrdinal; + uint64_t symSize; + auto l1S = lineStrRef.split(' '); + auto l1 = l1S.first; + auto l2S = l1S.second.split(' '); + auto l2 = l2S.first; + auto ephemeralStr = l2S.second; + if (l1.getAsInteger(10, symOrdinal) /* means error happens */ || + symOrdinal == 0) { + reportParseError("invalid ordinal field"); + return false; + } + if (l2.getAsInteger(16, symSize)) { + reportParseError("invalid size field"); + return false; + } + if (ephemeralStr.empty()) { + reportParseError("invalid name field"); + return false; + } + if (ephemeralStr[0] == 'N') { // Function symbol? + // Save ephemeralStr for persistency across Propeller lifecycle. + StringRef savedNameStr = PropfileStrSaver.save(ephemeralStr.substr(1)); + SymbolEntry::AliasesTy sAliases; + savedNameStr.split(sAliases, '/'); + StringRef sName = sAliases[0]; + assert(SymbolOrdinalMap.find(symOrdinal) == SymbolOrdinalMap.end()); + createFunctionSymbol(symOrdinal, sName, std::move(sAliases), symSize); + } else { + // It's a bb symbol. + auto lineStrRef = ephemeralStr.split('.'); + uint64_t funcIndex; + if (lineStrRef.first.getAsInteger(10, funcIndex) || funcIndex == 0) { + reportParseError("invalid function index field"); + return false; + } + // Only save the index part, which is highly reusable. Note + // PropfileStrSaver is a UniqueStringSaver. + StringRef bbIndex = PropfileStrSaver.save(lineStrRef.second); + auto existingI = SymbolOrdinalMap.find(funcIndex); + if (existingI != SymbolOrdinalMap.end()) { + if (existingI->second->BBTag) { + reportParseError("index '" + std::to_string(funcIndex) + + "' is not a function index, but a bb index"); + return false; + } + createBasicBlockSymbol(symOrdinal, existingI->second.get(), bbIndex, + symSize); + } else + // A bb symbol appears earlier than its wrapping function, rare, but + // not impossible, rather play it safely. + bbSymbols.emplace_back(symOrdinal, funcIndex, bbIndex, symSize); + } + } // End of iterating all symbols. + + for (std::tuple &sym : bbSymbols) { + uint64_t symOrdinal; + uint64_t funcIndex; + uint64_t symSize; + StringRef bbIndex; + std::tie(symOrdinal, funcIndex, bbIndex, symSize) = sym; + auto existingI = SymbolOrdinalMap.find(funcIndex); + if (existingI == SymbolOrdinalMap.end()) { + reportParseError("function with index number '" + + std::to_string(funcIndex) + "' does not exist"); + return false; + } + createBasicBlockSymbol(symOrdinal, existingI->second.get(), bbIndex, + symSize); + } + return true; +} + +// Helper method to parse a branch or fallthrough record like below +// 10 12 232590 R +static bool parseBranchOrFallthroughLine(StringRef lineRef, + uint64_t *fromNodeIdx, + uint64_t *toNodeIdx, uint64_t *count, + char *type) { + auto getInt = [](const StringRef &S) -> uint64_t { + uint64_t r; + if (S.getAsInteger(10, r) /* string contains more than numbers */ + || r == 0) + return 0; + return r; + }; + auto s0 = lineRef.split(' '); + *fromNodeIdx = getInt(s0.first); + auto s1 = s0.second.split(' '); + *toNodeIdx = getInt(s1.first); + auto s2 = s1.second.split(' '); + *count = getInt(s2.first); + if (!*fromNodeIdx || !*toNodeIdx || !*count) + return false; + if (!s2.second.empty()) + if (s2.second == "C" || s2.second == "R") + *type = s2.second[0]; + else + return false; + else + *type = '\0'; + return true; +} + +// Read propeller profile. Refer header file for detail about propeller profile. +bool Propfile::processProfile() { + std::string line; + uint64_t branchCnt = 0; + uint64_t fallthroughCnt = 0; + while (std::getline(PropfStream, line).good()) { + ++LineNo; + if (line[0] == '#' || line[0] == '!') + continue; + if (line[0] == 'S' || line[0] == 'B' || line[0] == 'F') { + LineTag = line[0]; + continue; + } + if (LineTag != 'B' && LineTag != 'F') + break; + + StringRef L(line); // LineBuf is null-terminated. + uint64_t from, to, count; + char tag; + if (!parseBranchOrFallthroughLine(L, &from, &to, &count, &tag)) { + reportParseError("unrecognized line:\n" + L.str()); + return false; + } + CFGNode *fromN = Prop.findCfgNode(from); + CFGNode *toN = Prop.findCfgNode(to); + if (!fromN || !toN) + continue; + + if (LineTag == 'B') { + ++branchCnt; + if (fromN->CFG == toN->CFG) + fromN->CFG->mapBranch(fromN, toN, count, tag == 'C', tag == 'R'); + else + fromN->CFG->mapCallOut(fromN, toN, 0, count, tag == 'C', tag == 'R'); + } else { + ++fallthroughCnt; + // LineTag == 'F' + if (fromN->CFG == toN->CFG) + fromN->CFG->markPath(fromN, toN, count); + } + } + + if (!branchCnt) + warn("propeller processed 0 branch info"); + if (!fallthroughCnt) + warn("ropeller processed 0 fallthrough info"); + return true; +} + +// Parse each ELF file, create CFG and attach profile data to CFG. +void Propeller::processFile(ObjectView *view) { + if (view) { + if (CFGBuilder(*this, view).buildCFGs()) { + // Updating global data structure. + std::lock_guard lock(Lock); + Views.emplace_back(view); + for (std::pair> &P : + view->CFGs) { + auto result = CFGMap[P.first].emplace(P.second.get()); + (void)(result); + assert(result.second); + } + } else { + warn("skipped building CFG for '" + view->ViewName +"'"); + ++ProcessFailureCount; + } + } +} + +CFGNode *Propeller::findCfgNode(uint64_t symbolOrdinal) { + assert(Propf->SymbolOrdinalMap.find(symbolOrdinal) != + Propf->SymbolOrdinalMap.end()); + SymbolEntry *symbol = Propf->SymbolOrdinalMap[symbolOrdinal].get(); + if (!symbol) { + // This is an internal error, should not happen. + error(std::string("invalid symbol ordinal: " + + std::to_string(symbolOrdinal))); + return nullptr; + } + SymbolEntry *funcSym = symbol->BBTag ? symbol->ContainingFunc : symbol; + for (auto &funcAliasName : funcSym->Aliases) { + auto cfgLI = CFGMap.find(funcAliasName); + if (cfgLI == CFGMap.end()) + continue; + + // Objects (CfgLI->second) are sorted in the way they appear on the command + // line, which is the same as how linker chooses the weak symbol definition. + if (!symbol->BBTag) { + for (auto *CFG : cfgLI->second) + // Check CFG does have name "SymName". + for (auto &node : CFG->Nodes) + if (node->ShName == funcAliasName) + return node.get(); + } else { + uint32_t NumOnes; + // Compare the number of "a" in aaa...a.BB.funcname against integer + // NumOnes. + if (symbol->Name.getAsInteger(10, NumOnes) || !NumOnes) + warn("internal error, BB name is invalid: '" + symbol->Name.str()); + else + for (auto *CFG : cfgLI->second) + for (auto &node : CFG->Nodes) { + // Check CFG does have name "SymName". + auto t = node->ShName.find_first_of('.'); + if (t != std::string::npos && t == NumOnes) + return node.get(); + } + } + } + return nullptr; +} + +void Propeller::calculateNodeFreqs() { + auto sumEdgeWeights = [](std::vector &edges) -> uint64_t { + return std::accumulate( + edges.begin(), edges.end(), 0, + [](uint64_t pSum, const CFGEdge *edge) { return pSum + edge->Weight; }); + }; + for (auto &cfgP : CFGMap) { + auto &cfg = *cfgP.second.begin(); + if (cfg->Nodes.empty()) + continue; + bool Hot = false; + cfg->forEachNodeRef([&Hot, &sumEdgeWeights](CFGNode &node) { + uint64_t maxCallOut = + node.CallOuts.empty() + ? 0 + : (*std::max_element(node.CallOuts.begin(), node.CallOuts.end(), + [](const CFGEdge *E1, const CFGEdge *E2) { + return E1->Weight < E2->Weight; + })) + ->Weight; + node.Freq = std::max({sumEdgeWeights(node.Outs), sumEdgeWeights(node.Ins), + sumEdgeWeights(node.CallIns), maxCallOut}); + + Hot |= (node.Freq != 0); + }); + if (Hot && cfg->getEntryNode()->Freq == 0) + cfg->getEntryNode()->Freq = 1; + } +} + +// Returns true if linker output target matches propeller profile. +bool Propeller::checkTarget() { + if (propellerConfig.optPropeller.empty()) + return false; + std::string propellerFileName = propellerConfig.optPropeller.str(); + // Propfile takes ownership of FPtr. + Propf.reset(new Propfile(*this, propellerFileName)); + Propf->PropfStream.open(Propf->PropfName); + if (!Propf->PropfStream.good()) { + error(std::string("failed to open '") + propellerFileName + "'"); + return false; + } + return Propf->matchesOutputFileName( + llvm::sys::path::filename(propellerConfig.optLinkerOutputFile)); +} + +// Entrance of Propeller framework. This processes each elf input file in +// parallel and stores the result information. +bool Propeller::processFiles(std::vector &views) { + if (!Propf->readSymbols()) { + error(std::string("invalid propfile: '") + + propellerConfig.optPropeller.str() + "'"); + return false; + } + + ProcessFailureCount = 0; + llvm::parallel::for_each( + llvm::parallel::parallel_execution_policy(), views.begin(), views.end(), + std::bind(&Propeller::processFile, this, std::placeholders::_1)); + + if (ProcessFailureCount * 100 / views.size() >= 50) + warn("propeller failed to parse more than half the objects, " + "optimization would suffer"); + + /* Drop alias cfgs. */ + for (SymbolEntry *funcS : Propf->FunctionsWithAliases) { + ControlFlowGraph *primaryCfg = nullptr; + CfgMapTy::iterator primaryCfgMapEntry; + for (StringRef &AliasName : funcS->Aliases) { + auto cfgMapI = CFGMap.find(AliasName); + if (cfgMapI == CFGMap.end()) + continue; + + if (cfgMapI->second.empty()) + continue; + + if (!primaryCfg || + primaryCfg->Nodes.size() < (*cfgMapI->second.begin())->Nodes.size()) { + if (primaryCfg) + CFGMap.erase(primaryCfgMapEntry); + + primaryCfg = *cfgMapI->second.begin(); + primaryCfgMapEntry = cfgMapI; + } else + CFGMap.erase(cfgMapI); + } + } + + // Map profiles. + if (!Propf->processProfile()) + return false; + + calculateNodeFreqs(); + + dumpCfgs(); + + // Releasing all support data (symbol ordinal / name map, saved string refs, + // etc) before moving to reordering. + Propf.reset(nullptr); + return true; +} + +bool Propeller::dumpCfgs() { + if (propellerConfig.optDumpCfgs.empty()) return true; + + std::set cfgToDump(propellerConfig.optDumpCfgs.begin(), + propellerConfig.optDumpCfgs.end()); + llvm::SmallString<128> cfgOutputDir(propellerConfig.optLinkerOutputFile); + llvm::sys::path::remove_filename(cfgOutputDir); + for (auto &cfgName : cfgToDump) { + if (cfgName == "@") { +#ifdef PROPELLER_PROTOBUF + if (!protobufPrinter.get()) + protobufPrinter.reset(ProtobufPrinter::create( + Twine(propellerConfig.optLinkerOutputFile, ".cfg.pb.txt").str())); +#else + warn("dump to protobuf not supported"); +#endif + continue; + } + auto cfgLI = CFGMap.find(cfgName); + if (cfgLI == CFGMap.end()) { + warn("could not dump cfg for function '" + cfgName + + "' : no such function name exists"); + continue; + } + int Index = 0; + for (auto *CFG : cfgLI->second) + if (CFG->Name == cfgName) { + llvm::SmallString<128> cfgOutput = cfgOutputDir; + if (++Index <= 1) + llvm::sys::path::append(cfgOutput, (CFG->Name + ".dot")); + else + llvm::sys::path::append( + cfgOutput, + (CFG->Name + "." + StringRef(std::to_string(Index) + ".dot"))); + if (!CFG->writeAsDotGraph(StringRef(cfgOutput))) + warn("failed to dump CFG: '" + cfgName + "'"); + } + } + return true; +} + +ObjectView *Propeller::createObjectView(const StringRef &vN, + const uint32_t ordinal, + const MemoryBufferRef &fR) { + const char *FH = fR.getBufferStart(); + if (fR.getBufferSize() > 6 && FH[0] == 0x7f && FH[1] == 'E' && FH[2] == 'L' && + FH[3] == 'F') { + auto r = ObjectFile::createELFObjectFile(fR); + if (r) + return new ObjectView(*r, vN, ordinal, fR); + } + return nullptr; +} + +// Generate symbol ordering file according to selected optimization pass and +// feed it to the linker. +std::vector Propeller::genSymbolOrderingFile() { + int total_objs = 0; + int hot_objs = 0; + for (auto &Obj : Views) { + for (auto &CP : Obj->CFGs) { + auto &C = *(CP.second); + if (C.isHot()) { + ++hot_objs; + break; // process to next object. + } + } + ++total_objs; + } + + std::list cfgOrder; + if (propellerConfig.optReorderFuncs) { + CallChainClustering c3; + c3.init(*this); + auto cfgsReordered = c3.doOrder(cfgOrder); + (void)cfgsReordered; + } else { + forEachCfgRef( + [&cfgOrder](ControlFlowGraph &cfg) { cfgOrder.push_back(&cfg); }); + cfgOrder.sort([](const ControlFlowGraph *a, const ControlFlowGraph *b) { + const auto *aEntry = a->getEntryNode(); + const auto *bEntry = b->getEntryNode(); + return aEntry->MappedAddr < bEntry->MappedAddr; + }); + } + + std::list symbolList(1, "Hot"); + const auto hotPlaceHolder = symbolList.begin(); + const auto coldPlaceHolder = symbolList.end(); + if (propellerConfig.optPrintStats) + fprintf(stderr, "HISTOGRAM: Function.Name,BBs.all,BBs.hot\n"); + + for (auto *cfg : cfgOrder) { + if (propellerConfig.optPrintStats){ + unsigned hotBBs = 0; + unsigned allBBs = 0; + cfg->forEachNodeRef([&hotBBs, &allBBs] (CFGNode &node) { + if (node.Freq) + hotBBs++; + allBBs++; + }); + fprintf(stderr, "HISTOGRAM: %s,%u,%u\n", cfg->Name.str().c_str(), allBBs, + hotBBs); + } + if (cfg->isHot() && propellerConfig.optReorderBlocks) { + NodeChainBuilder(cfg, *this).doSplitOrder( + symbolList, hotPlaceHolder, + propellerConfig.optSplitFuncs ? coldPlaceHolder : hotPlaceHolder); + } else { + auto PlaceHolder = + propellerConfig.optSplitFuncs ? coldPlaceHolder : hotPlaceHolder; + cfg->forEachNodeRef([&symbolList, PlaceHolder](CFGNode &N) { + symbolList.insert(PlaceHolder, N.ShName); + }); +#ifdef PROPELLER_PROTOBUF + if (protobufPrinter && cfg->isHot()) + protobufPrinter->addCFG(*cfg); +#endif + } + } + +#ifdef PROPELLER_PROTOBUF + if (protobufPrinter) { + protobufPrinter->printCFGGroup(); + protobufPrinter.reset(); + } +#endif + + calculateLegacy(symbolList, hotPlaceHolder, coldPlaceHolder); + + if (!propellerConfig.optDumpSymbolOrder.empty()) { + FILE *fp = fopen(propellerConfig.optDumpSymbolOrder.str().c_str(), "w"); + if (!fp) + warn(StringRef("dump symbol order: failed to open ") + "'" + + propellerConfig.optDumpSymbolOrder + "'"); + else { + for (auto &sym : symbolList) + fprintf(fp, "%s\n", sym.str().c_str()); + fclose(fp); + llvm::outs() << "dumped symbol order file to: '" + << propellerConfig.optDumpSymbolOrder.str() << "\n"; + } + } + + symbolList.erase(hotPlaceHolder); + + return std::vector( + std::move_iterator::iterator>(symbolList.begin()), + std::move_iterator::iterator>(symbolList.end())); +} + +// Calculate a std::list of basicblock symbols that are to be kept in the final +// binary. For hot bb symbols, all bb symbols are to be dropped, because the +// content of all hot bb sections are grouped together with the origin function. +// For cold bb symbols, only the first bb symbols of the same function are kept. +void Propeller::calculateLegacy( + std::list &symList, + std::list::iterator hotPlaceHolder, + std::list::iterator coldPlaceHolder) { + // No function split or no cold symbols, all bb symbols shall be removed. + if (hotPlaceHolder == coldPlaceHolder) + return; + // For cold bb symbols that are split and placed in cold segements, + // only the first bb symbol of every function partition is kept. + StringRef LastFuncName = ""; + for (auto i = std::next(hotPlaceHolder), j = coldPlaceHolder; i != j; ++i) { + StringRef sName = *i; + StringRef fName; + if (SymbolEntry::isBBSymbol(sName, &fName)) { + if (LastFuncName.empty() || LastFuncName != fName) + PropLeg.BBSymbolsToKeep.insert(sName); + LastFuncName = fName; + } + } + return; +} + +bool Propeller::ObjectViewOrdinalComparator:: +operator()(const ControlFlowGraph *a, const ControlFlowGraph *b) const { + return a->View->Ordinal < b->View->Ordinal; +} + +PropellerLegacy PropLeg; + +PropellerConfig propellerConfig; + +} // namespace propeller +} // namespace lld Index: lld/ELF/Propeller/PropellerCFG.h =================================================================== --- /dev/null +++ lld/ELF/Propeller/PropellerCFG.h @@ -0,0 +1,232 @@ +//===-------------------- PropellerELFCfg.h -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Class definitions for propeller cfg, edge, nodes and CFGBuilder. +// +// The ObjectView class represents one ELF file. The CFGBuilder class builds +// cfg for each function and store it in ObjectView::CFGs, indexed by cfg name. +// +// CFGBuilder::buildCFGs works this way: +// - groups funcName, a.BB.funcName, aa.BB.funcName and alike into one set, +// for each set, passes the set to "CFGBuilder::buildCFG" +// - each element in the set is a section, we then know from its section +// relocations the connections to other sections. (a) +// - from (a), we build CFG. +// +// Three important functions in ControlFlowGraph: +// mapBranch - apply counter to edge A->B, where A, B belong to the same func +// +// mapCallOut - apply counter to edge A->B, where A, B belong to diff funcs +// +// markPath - apply counter to all nodes/edges betwee A and B, A and B belong +// to same func +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_PROPELLER_CFG_H +#define LLD_ELF_PROPELLER_CFG_H + +#include "Propeller.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ObjectFile.h" + +#include +#include +#include +#include + +using llvm::object::ObjectFile; +using llvm::object::section_iterator; +using llvm::object::SymbolRef; + +namespace lld { +namespace propeller { + +class ObjectView; +class CFGNode; +class ControlFlowGraph; + +// All instances of CFGEdge are owned by their CFG. +class CFGEdge { +public: + CFGNode *Src; + CFGNode *Sink; + uint64_t Weight; + // Whether it's an edge introduced by recursive-self-call. (Usually + // calls do not split basic blocks and do not introduce new edges.) + enum EdgeType : char { + INTRA_FUNC, + INTRA_RSC, // Recursive call. + INTRA_RSR, // Return from recursive call. + // Intra edge dynamically created because of indirect jump, etc. + INTRA_DYNA, + // Inter function jumps / calls. + INTER_FUNC_CALL, + INTER_FUNC_RETURN, + } Type = INTRA_FUNC; + +protected: + CFGEdge(CFGNode *N1, CFGNode *N2, EdgeType T) + : Src(N1), Sink(N2), Weight(0), Type(T) {} + + friend class ControlFlowGraph; +}; + +// All instances of CFGNode are owned by their CFG. +class CFGNode { +public: + uint64_t Shndx; + StringRef ShName; + uint64_t ShSize; + // Note, "MappedAddr"s are not real/virtual addresses, they are ordinals from + // the propeller file. However, ordinals from propeller do reflect the true + // orders of symbol address. + uint64_t MappedAddr; + uint64_t Freq; + ControlFlowGraph *CFG; + + std::vector Outs; // Intra function edges. + std::vector Ins; // Intra function edges. + std::vector CallOuts; // Callouts/returns to other functions. + std::vector CallIns; // Callins/returns from other functions. + + // Fallthrough edge, could be nullptr. And if not, FTEdge is in Outs. + CFGEdge *FTEdge; + + const static uint64_t InvalidAddress = -1; + + unsigned getBBIndex() const { + StringRef FName, BName; + if (SymbolEntry::isBBSymbol(ShName, &FName, &BName)) + return BName.size(); + return 0; + } + +private: + CFGNode(uint64_t _Shndx, const StringRef &_ShName, uint64_t _Size, + uint64_t _MappedAddr, ControlFlowGraph *_Cfg) + : Shndx(_Shndx), ShName(_ShName), ShSize(_Size), MappedAddr(_MappedAddr), + Freq(0), CFG(_Cfg), Outs(), Ins(), CallOuts(), CallIns(), + FTEdge(nullptr) {} + + friend class ControlFlowGraph; + friend class CFGBuilder; +}; + +class ControlFlowGraph { +public: + ObjectView *View; + StringRef Name; + uint64_t Size; + + // ControlFlowGraph assumes the ownership for all Nodes / Edges. + std::vector> Nodes; // Sorted by address. + std::vector> IntraEdges; + std::vector> InterEdges; + + ControlFlowGraph(ObjectView *V, const StringRef &N, uint64_t S) + : View(V), Name(N), Size(S) {} + + bool markPath(CFGNode *from, CFGNode *to, uint64_t cnt = 1); + void mapBranch(CFGNode *from, CFGNode *to, uint64_t cnt = 1, + bool isCall = false, bool isReturn = false); + void mapCallOut(CFGNode *from, CFGNode *to, uint64_t toAddr, uint64_t cnt = 1, + bool isCall = false, bool isReturn = false); + + CFGNode *getEntryNode() const { + assert(!Nodes.empty()); + return Nodes.begin()->get(); + } + + bool isHot() const { + if (Nodes.empty()) + return false; + return (getEntryNode()->Freq != 0); + } + + template void forEachNodeRef(Visitor V) { + for (auto &N : Nodes) + V(*N); + } + + bool writeAsDotGraph(StringRef cfgOutName); + +private: + // Create and take ownership. + CFGEdge *createEdge(CFGNode *from, CFGNode *to, + typename CFGEdge::EdgeType type); + + void emplaceEdge(CFGEdge *edge) { + if (edge->Type < CFGEdge::INTER_FUNC_CALL) + IntraEdges.emplace_back(edge); + else + InterEdges.emplace_back(edge); + } + + friend class CFGBuilder; +}; + +class CFGBuilder { +public: + Propeller *Prop; + ObjectView *View; + + uint32_t BB = 0; + uint32_t BBWoutAddr = 0; + uint32_t InvalidCFGs = 0; + + CFGBuilder(Propeller &prop, ObjectView *vw) : Prop(&prop), View(vw) {} + + // See implementaion comments in .cpp. + bool buildCFGs(); + +protected: + // See implementaion comments in .cpp. + void buildCFG(ControlFlowGraph &cfg, const SymbolRef &cfgSym, + std::map> &nodeMap); + + // See implementation comments in .cpp. + void calculateFallthroughEdges( + ControlFlowGraph &cfg, + std::map> &nodeMap); + + // Build a map from section "Idx" -> Section that relocates this + // section. Only used during building phase. + void + buildRelocationSectionMap(std::map &relocSecMap); + // Build a map from section "Idx" -> node representing "Idx". Only + // used during building phase. + void buildShndxNodeMap(std::map> &nodeMap, + std::map &shndxNodeMap); +}; + +// ObjectView is a structure that corresponds to a single ELF file. +class ObjectView { +public: + ObjectView(std::unique_ptr &vF, const StringRef &vN, + const uint32_t vO, const MemoryBufferRef &fR) + : ViewFile(std::move(vF)), ViewName(vN), Ordinal(vO), FileRef(fR), + CFGs() {} + + void EraseCfg(ControlFlowGraph *&cfgPtr); + + std::unique_ptr ViewFile; + StringRef ViewName; + const uint32_t Ordinal; + MemoryBufferRef FileRef; + + // Name -> ControlFlowGraph mapping. + std::map> CFGs; +}; + +std::ostream &operator<<(std::ostream &out, const CFGNode &node); +std::ostream &operator<<(std::ostream &out, const CFGEdge &edge); +std::ostream &operator<<(std::ostream &out, const ControlFlowGraph &cfg); +} // namespace propeller +} // namespace lld +#endif Index: lld/ELF/Propeller/PropellerCFG.cpp =================================================================== --- /dev/null +++ lld/ELF/Propeller/PropellerCFG.cpp @@ -0,0 +1,514 @@ +//===-------------------- PropellerELFCfg.cpp -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file creates cfg and maps propeller profile onto cfg nodes / edges. +// +//===----------------------------------------------------------------------===// +#include "PropellerCFG.h" + +#include "Propeller.h" + +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +using llvm::object::ObjectFile; +using llvm::object::RelocationRef; +using llvm::object::section_iterator; +using llvm::object::SectionRef; +using llvm::object::SymbolRef; + +namespace lld { +namespace propeller { + +bool ControlFlowGraph::writeAsDotGraph(StringRef cfgOutName) { + std::error_code ec; + llvm::raw_fd_ostream os(cfgOutName, ec, llvm::sys::fs::CD_CreateAlways); + if (ec.value()) { + warn("failed to open: '" + cfgOutName + "'"); + return false; + } + os << "digraph " << Name.str() << "{\n"; + forEachNodeRef([&os](CFGNode &n) { + os << n.getBBIndex() << " [size=\"" << n.ShSize << "\"];"; + }); + os << "\n"; + for (auto &e : IntraEdges) { + bool IsFTEdge = (e->Src->FTEdge == e.get()); + os << " " << e->Src->getBBIndex() << " -> " << e->Sink->getBBIndex() + << " [label=\"" << e->Weight + << "\", weight=" << (IsFTEdge ? "1.0" : "0.1") << "];\n"; + } + os << "}\n"; + llvm::outs() << "done dumping cfg '" << Name.str() << "' into '" + << cfgOutName.str() << "'\n"; + return true; +} + +// Create an edge for "from->to". +CFGEdge *ControlFlowGraph::createEdge(CFGNode *from, CFGNode *to, + typename CFGEdge::EdgeType type) { + CFGEdge *edge = new CFGEdge(from, to, type); + if (type < CFGEdge::EdgeType::INTER_FUNC_CALL) { + from->Outs.push_back(edge); + to->Ins.push_back(edge); + } else { + from->CallOuts.push_back(edge); + to->CallIns.push_back(edge); + } + // Take ownership of "edge", cfg is responsible for all edges. + emplaceEdge(edge); + return edge; +} + +// Apply counter (cnt) to all edges between node from -> to. Both nodes are from +// the same cfg. +bool ControlFlowGraph::markPath(CFGNode *from, CFGNode *to, uint64_t cnt) { + if (from == nullptr) { + /* If the from node is null, walk backward from the to node while only + * one INTRA_FUNC incoming edge is found. */ + assert(to != nullptr); + CFGNode *p = to; + do { + std::vector intraInEdges; + std::copy_if(p->Ins.begin(), p->Ins.end(), + std::back_inserter(intraInEdges), [this](CFGEdge *e) { + return e->Type == CFGEdge::EdgeType::INTRA_FUNC && + e->Sink != getEntryNode(); + }); + if (intraInEdges.size() == 1) + p = intraInEdges.front()->Src; + else + p = nullptr; + } while (p && p != to); + return true; + } + + if (to == nullptr) { + /* If the to node is null, walk forward from the from node while only + * one INTRA_FUNC outgoing edge is found. */ + assert(from != nullptr); + CFGNode *p = from; + do { + std::vector IntraOutEdges; + std::copy_if(p->Outs.begin(), p->Outs.end(), + std::back_inserter(IntraOutEdges), [this](CFGEdge *e) { + return e->Type == CFGEdge::EdgeType::INTRA_FUNC && + e->Sink != getEntryNode(); + }); + if (IntraOutEdges.size() == 1) + p = IntraOutEdges.front()->Sink; + else + p = nullptr; + } while (p && p != from); + return true; + } + + assert(from->CFG == to->CFG); + if (from == to) + return true; + CFGNode *p = from; + while (p && p != to) { + if (p->FTEdge) { + p->FTEdge->Weight += cnt; + p = p->FTEdge->Sink; + } else + p = nullptr; + } + if (!p) + return false; + return true; +} + +// Apply counter (cnt) to the edge from node from -> to. Both nodes are from the +// same cfg. +void ControlFlowGraph::mapBranch(CFGNode *from, CFGNode *to, uint64_t cnt, + bool isCall, bool isReturn) { + assert(from->CFG == to->CFG); + + for (auto &e : from->Outs) { + bool edgeTypeOk = true; + if (!isCall && !isReturn) + edgeTypeOk = + e->Type == CFGEdge::INTRA_FUNC || e->Type == CFGEdge::INTRA_DYNA; + else if (isCall) + edgeTypeOk = e->Type == CFGEdge::INTRA_RSC; + if (isReturn) + edgeTypeOk = e->Type == CFGEdge::INTRA_RSR; + if (edgeTypeOk && e->Sink == to) { + e->Weight += cnt; + return; + } + } + + CFGEdge::EdgeType type = CFGEdge::INTRA_DYNA; + if (isCall) + type = CFGEdge::INTRA_RSC; + else if (isReturn) + type = CFGEdge::INTRA_RSR; + + createEdge(from, to, type)->Weight += cnt; +} + +// Apply counter (cnt) for calls/returns/ that cross function boundaries. +void ControlFlowGraph::mapCallOut(CFGNode *from, CFGNode *to, uint64_t toAddr, + uint64_t cnt, bool isCall, bool isReturn) { + assert(from->CFG == this); + assert(from->CFG != to->CFG); + CFGEdge::EdgeType edgeType = CFGEdge::INTER_FUNC_RETURN; + if (isCall || + (toAddr && to->CFG->getEntryNode() == to && toAddr == to->MappedAddr)) + edgeType = CFGEdge::INTER_FUNC_CALL; + if (isReturn) + edgeType = CFGEdge::INTER_FUNC_RETURN; + for (auto &e : from->CallOuts) + if (e->Sink == to && e->Type == edgeType) { + e->Weight += cnt; + return; + } + createEdge(from, to, edgeType)->Weight += cnt; +} + +// This function creates CFGs for a single object file. +// +// Step 1 - scan all the symbols, for each function symbols, create an entry in +// "groups", below is what "groups" looks like: +// groups: { +// "foo": [], +// "bar": [], +// } +// +// Step 2 - scan all the symbols, for each BB symbol, find it's function's +// group, and insert the bb symbol into the group. For example, if we have BB +// symbols "a.BB.foo", "aa.BB.foo" and "a.BB.bar", after step 2, the groups +// structure looks like: +// groups: { +// "foo": ["a.BB.foo", "aa.BB.foo"], +// "bar": ["a.BB.bar"], +// } +// +// Step 3 - for each group, create CFG and tmpNodeMap, the latter is an ordered +// map of CFGNode (index key is Symbol Ordinal). For the above example, the +// following data structure is created: +// CFG[Name=foo], tmpNodeMap={1: CFGNode[BBIndex="1"], 2:CFGNode[BBIndex="2"]} +// CFG[Name=bar], tmpNodeMap={3: CFGNode[BBIndex="3"]} +// +// For each CFG and tmpNodeMap, call CFGBuilder::buildCFG(). +bool CFGBuilder::buildCFGs() { + auto symbols = View->ViewFile->symbols(); + std::map> groups; + for (const SymbolRef &sym : symbols) { + auto r = sym.getType(); + auto s = sym.getName(); + if (r && s && *r == SymbolRef::ST_Function) { + StringRef symName = *s; + auto ri = groups.emplace(std::piecewise_construct, + std::forward_as_tuple(symName), + std::forward_as_tuple(1, sym)); + (void)(ri.second); + assert(ri.second); + } + } + + // Now we have a map of function names, group "x.bb.funcname". + for (const SymbolRef &sym : symbols) { + // All bb symbols are local, upon seeing the first global, exit. + if ((sym.getFlags() & SymbolRef::SF_Global) != 0) + break; + auto nameOrErr = sym.getName(); + if (!nameOrErr) + continue; + StringRef sName = *nameOrErr; + StringRef fName; + if (SymbolEntry::isBBSymbol(sName, &fName, nullptr)) { + auto L = groups.find(fName); + if (L != groups.end()) + L->second.push_back(sym); + } + } + + for (auto &i : groups) { + assert(i.second.size() >= 1); + std::map> tmpNodeMap; + SymbolRef cfgSym = *(i.second.begin()); + StringRef cfgName = i.first; + std::unique_ptr cfg( + new ControlFlowGraph(View, cfgName, 0)); + for (SymbolRef sym : i.second) { + auto symNameE = sym.getName(); + auto sectionIE = sym.getSection(); + if (symNameE && sectionIE && + (*sectionIE) != sym.getObject()->section_end()) { + StringRef symName = *symNameE; + uint64_t symShndx = (*sectionIE)->getIndex(); + // Note here: BB symbols only carry size information when + // -fbasicblock-section=all. Objects built with + // -fbasicblock-section=labels do not have size information + // for BB symbols. + uint64_t symSize = llvm::object::ELFSymbolRef(sym).getSize(); + // Drop bb sections with no code + if (!symSize) + continue; + auto *sE = Prop->Propf->findSymbol(symName); + if (sE) { + if (tmpNodeMap.find(sE->Ordinal) != tmpNodeMap.end()) { + error("Internal error checking cfg map."); + return false; + } + tmpNodeMap.emplace( + std::piecewise_construct, std::forward_as_tuple(sE->Ordinal), + std::forward_as_tuple(new CFGNode(symShndx, symName, sE->Size, + sE->Ordinal, cfg.get()))); + continue; + } + // Otherwise fallthrough to ditch cfg & tmpNodeMap. + } + tmpNodeMap.clear(); + cfg.reset(nullptr); + break; + } + + if (tmpNodeMap.empty()) + cfg.reset(nullptr); + + if (!cfg) + continue; // to next cfg group. + + uint32_t groupShndx = 0; + for (auto &T : tmpNodeMap) { + if (groupShndx != 0 && T.second->Shndx == groupShndx) { + cfg.reset(nullptr); + tmpNodeMap.clear(); + warn("basicblock sections must not have same section index, this is " + "usually caused by -fbasicblock-sections=labels. " + "Use -fbasicblock-sections=list/all instead"); + return false; + } + groupShndx = T.second->Shndx; + } + + if (cfg) { + buildCFG(*cfg, cfgSym, tmpNodeMap); + View->CFGs.emplace(cfg->Name, std::move(cfg)); + } + } // Enf of processing all groups. + return true; +} + +// Build map: TextSection -> It's Relocation Section. +// ELF file only contains link from Relocation Section -> It's text section. +void CFGBuilder::buildRelocationSectionMap( + std::map &relocationSectionMap) { + for (section_iterator i = View->ViewFile->section_begin(), + J = View->ViewFile->section_end(); + i != J; ++i) { + SectionRef secRef = *i; + if (llvm::object::ELFSectionRef(secRef).getType() == llvm::ELF::SHT_RELA) { + Expected rr = secRef.getRelocatedSection(); + if (rr) { + section_iterator &r = *rr; + assert(r != J); + relocationSectionMap.emplace(r->getIndex(), *i); + } + } + } +} + +// Build map: basicblock section index -> basicblock section node. +void CFGBuilder::buildShndxNodeMap( + std::map> &tmpNodeMap, + std::map &shndxNodeMap) { + for (auto &nodeL : tmpNodeMap) { + auto &node = nodeL.second; + auto insertResult = shndxNodeMap.emplace(node->Shndx, node.get()); + (void)(insertResult); + assert(insertResult.second); + } +} + +// Build CFG for a single function. +// +// For each BB sections of a single function, we iterate all the relocation +// entries, and for relocations that targets another BB in the same function, +// we create an edge between these 2 BBs. +void CFGBuilder::buildCFG( + ControlFlowGraph &cfg, const SymbolRef &cfgSym, + std::map> &tmpNodeMap) { + std::map shndxNodeMap; + buildShndxNodeMap(tmpNodeMap, shndxNodeMap); + + std::map relocationSectionMap; + buildRelocationSectionMap(relocationSectionMap); + + // Recursive call edges. + std::list rscEdges; + // Iterate all bb symbols. + for (auto &nPair : tmpNodeMap) { + CFGNode *srcNode = nPair.second.get(); + // For each bb section, we find its rela sections. + auto relaSecRefI = relocationSectionMap.find(srcNode->Shndx); + if (relaSecRefI == relocationSectionMap.end()) + continue; + + // Iterate all rela entries. + for (const RelocationRef &rela : relaSecRefI->second->relocations()) { + SymbolRef rSym = *(rela.getSymbol()); + bool isRSC = (cfgSym == rSym); + + // All bb section symbols are local symbols. + if (!isRSC && + ((rSym.getFlags() & llvm::object::BasicSymbolRef::SF_Global) != 0)) + continue; + + auto sectionIE = rSym.getSection(); + if (!sectionIE) + continue; + // Now we have the Shndx of one relocation target. + uint64_t symShndx((*sectionIE)->getIndex()); + CFGNode *targetNode{nullptr}; + // Check to see if Shndx is another BB section within the same function. + auto result = shndxNodeMap.find(symShndx); + if (result != shndxNodeMap.end()) { + targetNode = result->second; + if (targetNode) { + // If so, we create the edge. + CFGEdge *e = + cfg.createEdge(srcNode, targetNode, + isRSC ? CFGEdge::INTRA_RSC : CFGEdge::INTRA_FUNC); + // If it's a recursive call, record it. + if (isRSC) + rscEdges.push_back(e); + } + } + } + } + + // For each recursive call, we create a recursive-self-return edges for all + // exit edges. In the following example, create an edge bb5->bb3 FuncA: + // bb1: <---+ + // ... | + // bb2: | + // ... | r(ecursie)-s(elf)-C(all) edge + // bb3: | + // ... | + // call FuncA --- + + // xxx yyy <---+ + // ... | + // bb4: | + // ... | r(ecursie)-s(elf)-r(eturn) edge + // bb5: | + // ... | + // ret ----------+ + for (auto *rEdge : rscEdges) { + for (auto &nPair : tmpNodeMap) { + auto &n = nPair.second; + if (n->Outs.size() == 0 || + (n->Outs.size() == 1 && + (*n->Outs.begin())->Type == CFGEdge::INTRA_RSC)) { + // Now "n" is the exit node. + cfg.createEdge(n.get(), rEdge->Src, CFGEdge::INTRA_RSR); + } + } + } + calculateFallthroughEdges(cfg, tmpNodeMap); + + // Transfer nodes ownership to cfg and destroy tmpNodeMap. + for (auto &pair : tmpNodeMap) { + cfg.Nodes.emplace_back(std::move(pair.second)); + pair.second.reset(nullptr); + } + tmpNodeMap.clear(); + + // Set cfg size and re-calculate size of the entry basicblock, which is + // initially the size of the whole function. + cfg.Size = cfg.getEntryNode()->ShSize; + cfg.forEachNodeRef([&cfg](CFGNode &n) { + if (&n != cfg.getEntryNode()) + cfg.getEntryNode()->ShSize -= n.ShSize; + }); +} + +// Calculate fallthroughs. Edge p->q is fallthrough if p & q are adjacent (e.g. +// no other bbs are between p & q), and there is a NORMAL edge from p->Q. +// +// tmpNodeMap groups nodes according to their beginning address: +// addr1: [Node1] +// addr2: [Node2] +// addr3: [Node3] +// addr4: [Node4] +// And addr1 <= addr2 <= addr3 <= addr4. +void CFGBuilder::calculateFallthroughEdges( + ControlFlowGraph &cfg, + std::map> &tmpNodeMap) { + + auto setupFallthrough = [&cfg](CFGNode *n1, CFGNode *n2) { + for (auto *e : n1->Outs) + if (e->Type == CFGEdge::INTRA_FUNC && e->Sink == n2) { + n1->FTEdge = e; + return; + } + if (n1->ShSize == 0) + // An empty section always fallthrough to the next adjacent section. + n1->FTEdge = cfg.createEdge(n1, n2, CFGEdge::INTRA_FUNC); + }; + + for (auto p = tmpNodeMap.begin(), q = std::next(p), e = tmpNodeMap.end(); + q != e; ++p, ++q) + setupFallthrough(p->second.get(), q->second.get()); +} + +std::ostream &operator<<(std::ostream &out, const CFGNode &node) { + out << "[" + << (node.ShName == node.CFG->Name + ? "Entry" + : std::to_string(node.ShName.size() - node.CFG->Name.size() - 4)) + << "]" + << " [size=" << std::noshowbase << std::dec << node.ShSize << ", " + << " addr=" << std::showbase << std::hex << node.MappedAddr << ", " + << " frequency=" << std::showbase << std::dec << node.Freq << ", " + << " shndx=" << std::noshowbase << std::dec << node.Shndx << "]"; + return out; +} + +std::ostream &operator<<(std::ostream &out, const CFGEdge &edge) { + static const char *TypeStr[] = {"", " (*RSC*)", " (*RSR*)", " (*DYNA*)"}; + out << "edge: " << *edge.Src << " -> " << *edge.Sink << " [" << std::setw(12) + << std::setfill('0') << std::noshowbase << std::dec << edge.Weight << "]" + << TypeStr[edge.Type]; + return out; +} + +std::ostream &operator<<(std::ostream &out, const ControlFlowGraph &cfg) { + out << "cfg: '" << cfg.View->ViewName.str() << ":" << cfg.Name.str() + << "', size=" << std::noshowbase << std::dec << cfg.Size << std::endl; + for (auto &n : cfg.Nodes) { + auto &node = *n; + out << " node: " << node << std::endl; + for (auto &edge : node.Outs) { + out << " " << *edge << (edge == node.FTEdge ? " (*FT*)" : "") + << std::endl; + } + for (auto &edge : node.CallOuts) { + out << " Calls: '" << edge->Sink->ShName.str() + << "': " << std::noshowbase << std::dec << edge->Weight << std::endl; + } + } + out << std::endl; + return out; +} + +} // namespace propeller +} // namespace lld Index: lld/ELF/Propeller/PropellerConfig.h =================================================================== --- /dev/null +++ lld/ELF/Propeller/PropellerConfig.h @@ -0,0 +1,41 @@ +//===-------------------- PropellerConfig.h -------------------------------===// +// + +#ifndef LLD_ELF_PROPELLER_CONFIG_H +#define LLD_ELF_PROPELLER_CONFIG_H + +#include "llvm/ADT/StringRef.h" + +#include +#include + +using llvm::StringRef; + +namespace lld { +namespace propeller { + +struct PropellerConfig { + uint64_t optBackwardJumpDistance; + double optBackwardJumpWeight; + uint64_t optChainSplitThreshold; + std::vector optDebugSymbols; + std::vector optDumpCfgs; + StringRef optDumpSymbolOrder; + double optFallthroughWeight; + uint64_t optForwardJumpDistance; + double optForwardJumpWeight; + StringRef optLinkerOutputFile; + std::vector optOpts; + bool optPrintStats; + StringRef optPropeller; + bool optReorderBlocks; + bool optReorderFuncs; + bool optSplitFuncs; +}; + +extern PropellerConfig propellerConfig; + +} // namespace propeller +} // namespace lld + +#endif Index: lld/ELF/Propeller/propeller_cfg.proto =================================================================== --- /dev/null +++ lld/ELF/Propeller/propeller_cfg.proto @@ -0,0 +1,62 @@ +syntax = "proto2"; + +package llvm.plo.cfg; + +// Edge between basic blocks. +// Next Available: 4. +message Edge { + // Index of the source basic block. + optional int32 source = 1; + + // Index of the target basic block. + optional int32 target = 2; + + // Frequency count of the jump from profile. + optional int32 profile_count = 3; +} + +// Basic block in cfg. +// Next Available: 6. +message BasicBlock { + // 0-based index that distinguishes between different basic blocks. + optional int32 index = 1; + + // Size of the basic block. + optional int32 size = 2; + + // Frequency count of the basic block from profile, defined as + // max(sum(incoming_edges), sum(outgoing_edges)). + optional int32 profile_count = 3; + + // Edges coming into this basic block. + repeated Edge incoming_edges = 4; + + // Edges going out of this basic block. + repeated Edge outgoing_edges = 5; +} + +// Control flow graph where basic blocks are vertices and jumps are edges. +// Next Available: 6. +message CFG { + // Name of the function. + optional string name = 1; + + // Size of the function. + optional int32 size = 2; + + // Basic blocks in the CFG. + // The sequence of the repeated field matches the after-propeller layout. + repeated BasicBlock basic_blocks = 3; + + // The index of the entry block in the CFG. + optional int32 entry_block = 4; + + // Object name of the function belongs to. + optional string object_name = 5; +} + +// A group of CFGs. +// Next Available: 2. +message CFGGroup { + repeated CFG cfg_list = 1; +} Index: lld/include/lld/Common/PropellerCommon.h =================================================================== --- /dev/null +++ lld/include/lld/Common/PropellerCommon.h @@ -0,0 +1,117 @@ +#ifndef LLD_ELF_PROPELLER_COMMON_H +#define LLD_ELF_PROPELLER_COMMON_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ObjectFile.h" + +#include + +using llvm::SmallVector; +using llvm::StringRef; + +namespace lld { +namespace propeller { + +static const char BASIC_BLOCK_SEPARATOR[] = ".BB."; +static const char BASIC_BLOCK_UNIFIED_CHARACTER = 'a'; + +// This data structure is shared between lld propeller components and +// create_llvm_prof. In short, create_llvm_prof parses the binary, wraps all the +// symbol information using SymbolEntry class, whereas in Propeller, PropFile +// class parses the propeller profile (which is generated by create_llvm_prof), +// and wraps the symbol information in SymbolEntry. In other words, SymbolEntry +// is the interface shared between create_llvm_prof and Propeller. +// [create_llvm_prof refer to: +// https://github.com/shenhanc78/autofdo/tree/plo-dev] +struct SymbolEntry { + + using AliasesTy = SmallVector; + SymbolEntry(uint64_t O, const StringRef &N, AliasesTy &&As, uint64_t A, + uint64_t S, uint8_t T, bool BB = false, + SymbolEntry *FuncPtr = nullptr) + : Ordinal(O), Name(N), Aliases(As), Addr(A), Size(S), Type(T), BBTag(BB), + ContainingFunc(FuncPtr) {} + + // Unique index number across all symbols that participate linking. + uint64_t Ordinal; + // For a function symbol, it's the full name. For a bb symbol this is only the + // bbindex part, which is the number of "a"s before the ".bb." part. For + // example "8", "10", etc. Refer to Propfile::createFunctionSymbol and + // Propfile::createBasicBlockSymbol. + StringRef Name; + // Only valid for function (BBTag == false) symbols. And aliases[0] always + // equals to Name. For example, SymbolEntry.Name = "foo", SymbolEntry.Aliases + // = {"foo", "foo2", "foo3"}. + AliasesTy Aliases; + uint64_t Addr; + uint64_t Size; + uint8_t Type; // Of type: llvm::objet::SymbolRef::Type. + bool BBTag; // Whether this is a basic block section symbol. + // For BBTag symbols, this is the containing fuction pointer, for a normal + // function symbol, this points to itself. This is neverl nullptr. + SymbolEntry *ContainingFunc; + + bool containsAddress(uint64_t A) const { + return Addr <= A && A < Addr + Size; + } + + bool containsAnotherSymbol(SymbolEntry *O) const { + if (O->Size == 0) { + // Note if O's size is 0, we allow O on the end boundary. For example, if + // foo.BB.4 is at address 0x10. foo is [0x0, 0x10), we then assume foo + // contains foo.BB.4. + return this->Addr <= O->Addr && O->Addr <= this->Addr + this->Size; + } + return containsAddress(O->Addr) && containsAddress(O->Addr + O->Size - 1); + } + + bool operator<(const SymbolEntry &Other) const { + return this->Ordinal < Other.Ordinal; + } + + bool isFunction() const { + return this->Type == llvm::object::SymbolRef::ST_Function; + } + + // Return true if this SymbolEntry is a containing function for BBName. For + // example, if BBName is given as "aa.BB.foo", and SymbolEntry.Name = "foo", + // then SymbolEntry.isFunctionForBBName(BBName) == true. BBNames are from ELF + // object files. + bool isFunctionForBBName(StringRef BBName) const { + auto A = BBName.split(BASIC_BLOCK_SEPARATOR); + if (A.second == Name) + return true; + for (auto N : Aliases) + if (A.second == N) + return true; + return false; + } + + // Return true if "SymName" is a BB symbol, e.g., in the form of + // "a.BB.funcname", and set FuncName to the part after ".BB.", BBIndex to + // before ".BB.", if the pointers are nonnull. + static bool isBBSymbol(const StringRef &SymName, + StringRef *FuncName = nullptr, + StringRef *BBIndex = nullptr) { + if (SymName.empty()) + return false; + auto R = SymName.split(BASIC_BLOCK_SEPARATOR); + if (R.second.empty()) + return false; + for (auto *I = R.first.bytes_begin(), *J = R.first.bytes_end(); I != J; ++I) + if (*I != BASIC_BLOCK_UNIFIED_CHARACTER) + return false; + if (FuncName) + *FuncName = R.second; + if (BBIndex) + *BBIndex = R.first; + return true; + } + + static const uint64_t INVALID_ADDRESS = uint64_t(-1); +}; + +} // namespace propeller +} // namespace lld +#endif Index: lld/test/ELF/propeller/Inputs/bad-propeller-1.data =================================================================== --- /dev/null +++ lld/test/ELF/propeller/Inputs/bad-propeller-1.data @@ -0,0 +1,2 @@ +Symbols +a 2b N_start Index: lld/test/ELF/propeller/Inputs/bad-propeller-2.data =================================================================== --- /dev/null +++ lld/test/ELF/propeller/Inputs/bad-propeller-2.data @@ -0,0 +1,2 @@ +Symbols +1 2b Index: lld/test/ELF/propeller/Inputs/bad-propeller-3.data =================================================================== --- /dev/null +++ lld/test/ELF/propeller/Inputs/bad-propeller-3.data @@ -0,0 +1,2 @@ +Symbols +1 2b main Index: lld/test/ELF/propeller/Inputs/bad-propeller-4.data =================================================================== --- /dev/null +++ lld/test/ELF/propeller/Inputs/bad-propeller-4.data @@ -0,0 +1,3 @@ +Symbols +1 2b Nmain +2 10 3.1 Index: lld/test/ELF/propeller/Inputs/bad-propeller-5.data =================================================================== --- /dev/null +++ lld/test/ELF/propeller/Inputs/bad-propeller-5.data @@ -0,0 +1,4 @@ +Symbols +1 2b Nmain +2 10 1.1 +3 10 2.1 Index: lld/test/ELF/propeller/Inputs/propeller-2.data =================================================================== --- /dev/null +++ lld/test/ELF/propeller/Inputs/propeller-2.data @@ -0,0 +1,34 @@ +Symbols +1 2b N_start +2 1 N_dl_relocate_static_pie +3 0 Nderegister_tm_clones +4 0 Nregister_tm_clones +5 0 N__do_global_dtors_aux +6 0 Nframe_dummy +7 57 Nthis_is_very_code +8 2c Ncompute_flag +9 c2 Nmain +10 b 9.5 +11 12 9.1 +12 24 9.2 +13 2a 9.3 +14 1f 9.4 +15 8 9.6 +16 5d N__libc_csu_init +17 1 N__libc_csu_fini +18 0 N_init +19 0 N_fini +Branches +8 11 293285 R +11 8 295501 C +11 13 181981 +13 10 283908 +Fallthroughs +8 8 286906 +10 11 281228 +11 11 179986 +11 13 94381 +13 13 178963 +!compute_flag +!main +! Index: lld/test/ELF/propeller/Inputs/propeller-3.data =================================================================== --- /dev/null +++ lld/test/ELF/propeller/Inputs/propeller-3.data @@ -0,0 +1 @@ +@not_a_valid_file_name_957349kfhskg591yjkvhsd Index: lld/test/ELF/propeller/Inputs/propeller.data =================================================================== --- /dev/null +++ lld/test/ELF/propeller/Inputs/propeller.data @@ -0,0 +1,31 @@ +Symbols +1 2b N_start +2 1 N_dl_relocate_static_pie +3 0 Nderegister_tm_clones +4 0 Nregister_tm_clones +5 0 N__do_global_dtors_aux +6 0 Nframe_dummy +7 2c Ncompute_flag +8 7b Nmain +9 b 8.3 +10 12 8.1 +11 26 8.2 +12 8 8.4 +13 5d N__libc_csu_init +14 1 N__libc_csu_fini +15 0 N_init +16 0 N_fini +Branches +7 10 282779 R +10 7 289388 C +10 9 165714 +11 9 110451 +Fallthroughs +7 7 280473 +9 10 273908 +10 10 160338 +10 11 106891 +!_start +!compute_flag +!main +! Index: lld/test/ELF/propeller/Inputs/sample.c =================================================================== --- /dev/null +++ lld/test/ELF/propeller/Inputs/sample.c @@ -0,0 +1,28 @@ +/* sample.c */ +volatile int count; + +__attribute__((noinline)) +int compute_flag(int i) +{ + if (i % 10 < 4) /* ... in 40% of the iterations */ + return i + 1; + return 0; +} + +int main(void) +{ + int i; + int flag; + volatile double x = 1212121212, y = 121212; /* Avoid compiler optimizing away */ + + for (i = 0; i < 2000000000; i++) { + flag = compute_flag(i); + + /* Some other code */ + count++; + + if (flag) + x += x / y + y / x; /* Execute expensive division if flag is set */ + } + return 0; +} Index: lld/test/ELF/propeller/propeller-bad-profile-1.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-bad-profile-1.s @@ -0,0 +1,7 @@ +# REQUIRES: x86 +## Test control flow graph is created. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: not ld.lld -propeller=%S/Inputs/bad-propeller-1.data %t.o -o %t.out 2>&1 | FileCheck %s --check-prefix=CHECK + +# CHECK: invalid ordinal field Index: lld/test/ELF/propeller/propeller-bad-profile-2.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-bad-profile-2.s @@ -0,0 +1,7 @@ +# REQUIRES: x86 +## Test control flow graph is created. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: not ld.lld -propeller=%S/Inputs/bad-propeller-2.data %t.o -o %t.out 2>&1 | FileCheck %s --check-prefix=CHECK + +# CHECK: invalid name field Index: lld/test/ELF/propeller/propeller-bad-profile-3.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-bad-profile-3.s @@ -0,0 +1,7 @@ +# REQUIRES: x86 +## Test control flow graph is created. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: not ld.lld -propeller=%S/Inputs/bad-propeller-3.data %t.o -o %t.out 2>&1 | FileCheck %s --check-prefix=CHECK + +# CHECK: invalid function index field Index: lld/test/ELF/propeller/propeller-bad-profile-4.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-bad-profile-4.s @@ -0,0 +1,7 @@ +# REQUIRES: x86 +## Test control flow graph is created. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: not ld.lld -propeller=%S/Inputs/bad-propeller-4.data %t.o -o %t.out 2>&1 | FileCheck %s --check-prefix=CHECK + +# CHECK: function with index number '3' does not exist Index: lld/test/ELF/propeller/propeller-bad-profile-5.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-bad-profile-5.s @@ -0,0 +1,8 @@ +# REQUIRES: x86 +## Test control flow graph is created. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: not ld.lld -propeller=%S/Inputs/bad-propeller-5.data %t.o -o %t.out 2>&1 | FileCheck %s --check-prefix=CHECK + +# CHECK: index '2' is not a function index, but a bb index + Index: lld/test/ELF/propeller/propeller-bbsections-dump.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-bbsections-dump.s @@ -0,0 +1,61 @@ +# REQUIRES: x86 +## Test control flow graph is created. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld -propeller=%S/Inputs/propeller.data -propeller-dump-cfg=main %t.o -o %t.out + +# RUN: cat %T/main.dot | FileCheck %s --check-prefix=CFG +# CFG: 0 [size="48"];3 [size="11"];1 [size="18"];2 [size="38"];4 [size="8"]; +# CFG: 0 -> 1 +# CFG: 3 -> 4 +# CFG: 3 -> 1 [label="273908" +# CFG: 1 -> 3 [label="165714" +# CFG: 1 -> 2 [label="106891" +# CFG: 2 -> 3 [label="110451" + +.text + .section .text.compute_flag,"ax",@progbits + .globl compute_flag + .type compute_flag,@function +compute_flag: + movslq %edi, %rcx + retq +.Lfunc_end0: + .size compute_flag, .Lfunc_end0-compute_flag + + .section .text.main,"ax",@progbits + .globl main + .type main,@function +main: + pushq %rbx + jmp a.BB.main # 0 -> 1 + + .section .text.main,"ax",@progbits,unique,1 +aaa.BB.main: + je aaaa.BB.main # 3 -> 4 + jmp a.BB.main # 3 -> 1 +.Ltmp0: + .size aaa.BB.main, .Ltmp0-aaa.BB.main + + .section .text.main,"ax",@progbits,unique,2 +a.BB.main: + je aaa.BB.main # 1 -> 3 + jmp aa.BB.main # 1 -> 2 +.Ltmp1: + .size a.BB.main, .Ltmp1-a.BB.main + + .section .text.main,"ax",@progbits,unique,3 +aa.BB.main: + jmp aaa.BB.main # 2 -> 3 +.Ltmp2: + .size aa.BB.main, .Ltmp2-aa.BB.main + + .section .text.main,"ax",@progbits,unique,4 +aaaa.BB.main: + retq +.Ltmp3: + .size aaaa.BB.main, .Ltmp3-aaaa.BB.main + + .section .text.main,"ax",@progbits +.Lfunc_end1: + .size main, .Lfunc_end1-main Index: lld/test/ELF/propeller/propeller-compressed-strtab-lto.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-compressed-strtab-lto.s @@ -0,0 +1,62 @@ +# REQUIRES: x86 +## Test control flow graph is created. + +# RUN: clang -c -flto=thin -fbasicblock-sections=all -O2 %S/Inputs/sample.c -o %t.o +# RUN: file %t.o | FileCheck %s --check-prefix=FILETYPE +# FILETYPE: LLVM IR bitcode + +# RUN: clang -fuse-ld=lld -fbasicblock-sections=all -Wl,-lto-basicblock-sections=all -Wl,-propeller=%S/Inputs/propeller.data -Wl,-propeller-keep-named-symbols -O2 %t.o -o %t.out + +## We shall have "a.BB.main", "aa.BB.main", "aaa.BB.main", "aaaa.BB.main" in the symbol table. +# RUN: llvm-nm %t.out | grep -cF ".BB.main" | FileCheck %s --check-prefix=SYM_COUNT +# SYM_COUNT: 4 +## But we only have "aaaa.BB.main" in .strtab, all others are compressed. +# RUN: llvm-readelf --string-dump=.strtab %t.out | grep -cF ".BB.main" | FileCheck %s --check-prefix=STRTAB_COUNT +# STRTAB_COUNT: 1 + +.text + .section .text.compute_flag,"ax",@progbits + .globl compute_flag + .type compute_flag,@function +compute_flag: + movslq %edi, %rcx + retq +.Lfunc_end0: + .size compute_flag, .Lfunc_end0-compute_flag + + .section .text.main,"ax",@progbits + .globl main + .type main,@function +main: + pushq %rbx + jmp a.BB.main # 0 -> 1 + + .section .text.main,"ax",@progbits,unique,1 +aaa.BB.main: + je aaaa.BB.main # 3 -> 4 + jmp a.BB.main # 3 -> 1 +.Ltmp0: + .size aaa.BB.main, .Ltmp0-aaa.BB.main + + .section .text.main,"ax",@progbits,unique,2 +a.BB.main: + je aaa.BB.main # 1 -> 3 + jmp aa.BB.main # 1 -> 2 +.Ltmp1: + .size a.BB.main, .Ltmp1-a.BB.main + + .section .text.main,"ax",@progbits,unique,3 +aa.BB.main: + jmp aaa.BB.main # 2 -> 3 +.Ltmp2: + .size aa.BB.main, .Ltmp2-aa.BB.main + + .section .text.main,"ax",@progbits,unique,4 +aaaa.BB.main: + retq +.Ltmp3: + .size aaaa.BB.main, .Ltmp3-aaaa.BB.main + + .section .text.main,"ax",@progbits +.Lfunc_end1: + .size main, .Lfunc_end1-main Index: lld/test/ELF/propeller/propeller-compressed-strtab.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-compressed-strtab.s @@ -0,0 +1,60 @@ +# REQUIRES: x86 +## Test "a.BB.main", "aa.BB.main", "aaa.BB.main" and "aaaa.BB.main" are saved +## in strtab in compressed form. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld -propeller=%S/Inputs/propeller.data %t.o -propeller-keep-named-symbols -o %t.out + +## We shall have "a.BB.main", "aa.BB.main", "aaa.BB.main", "aaaa.BB.main" in the symbol table. +# RUN: llvm-nm %t.out | grep -cF ".BB.main" | FileCheck %s --check-prefix=SYM_COUNT +# SYM_COUNT: 4 +## But we only have "aaaa.BB.main" in .strtab, all others are compressed. +# RUN: llvm-readobj --string-dump=.strtab %t.out | grep -cF ".BB.main" | FileCheck %s --check-prefix=STRTAB_COUNT +# STRTAB_COUNT: 1 + +.text + .section .text.compute_flag,"ax",@progbits + .globl compute_flag + .type compute_flag,@function +compute_flag: + movslq %edi, %rcx + retq +.Lfunc_end0: + .size compute_flag, .Lfunc_end0-compute_flag + + .section .text.main,"ax",@progbits + .globl main + .type main,@function +main: + pushq %rbx + jmp a.BB.main # 0 -> 1 + + .section .text.main,"ax",@progbits,unique,1 +aaa.BB.main: + je aaaa.BB.main # 3 -> 4 + jmp a.BB.main # 3 -> 1 +.Ltmp0: + .size aaa.BB.main, .Ltmp0-aaa.BB.main + + .section .text.main,"ax",@progbits,unique,2 +a.BB.main: + je aaa.BB.main # 1 -> 3 + jmp aa.BB.main # 1 -> 2 +.Ltmp1: + .size a.BB.main, .Ltmp1-a.BB.main + + .section .text.main,"ax",@progbits,unique,3 +aa.BB.main: + jmp aaa.BB.main # 2 -> 3 +.Ltmp2: + .size aa.BB.main, .Ltmp2-aa.BB.main + + .section .text.main,"ax",@progbits,unique,4 +aaaa.BB.main: + retq +.Ltmp3: + .size aaaa.BB.main, .Ltmp3-aaaa.BB.main + + .section .text.main,"ax",@progbits +.Lfunc_end1: + .size main, .Lfunc_end1-main Index: lld/test/ELF/propeller/propeller-error-on-bblabels.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-error-on-bblabels.s @@ -0,0 +1,60 @@ +# REQUIRES: x86 +## Test propeller fails if it tries to process an object that is built with "-fbasicblock-sections=labels". + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld -propeller=%S/Inputs/propeller.data %t.o -o %t.out 2>&1 | FileCheck %s --check-prefix=CHECK + +# CHECK: warning: basicblock sections must not have same section index + + .text + .globl compute_flag + .p2align 4, 0x90 + .type compute_flag,@function +compute_flag: +# %bb.0: + movslq %edi, %rcx + retq +.Ltmp0: + .size .BB.compute_flag, .Ltmp0-.BB.compute_flag +.Lfunc_end0: + .size compute_flag, .Lfunc_end0-compute_flag + .globl main + .p2align 4, 0x90 + .type main,@function +main: +# %bb.0: + pushq %rbx + subq $16, %rsp + jmp a.BB.main +.Ltmp1: + .size .BB.main, .Ltmp1-.BB.main + .p2align 4, 0x90 +aaa.BB.main: + addl $1, %ebx + cmpl $2000000000, %ebx + je aaaa.BB.main +.Ltmp2: + .size aaa.BB.main, .Ltmp2-aaa.BB.main +a.BB.main: + movl %ebx, %edi + callq compute_flag + addl $1, count(%rip) + testl %eax, %eax + je aaa.BB.main +.Ltmp3: + .size a.BB.main, .Ltmp3-a.BB.main +aa.BB.main: + movsd (%rsp), %xmm0 + jmp aaa.BB.main +.Ltmp4: + .size aa.BB.main, .Ltmp4-aa.BB.main +aaaa.BB.main: + xorl %eax, %eax + popq %rbx + retq +.Ltmp5: + .size aaaa.BB.main, .Ltmp5-aaaa.BB.main +.Lfunc_end1: + .size main, .Lfunc_end1-main + .type count,@object + .comm count,4,4 Index: lld/test/ELF/propeller/propeller-keep-bb-symbols.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-keep-bb-symbols.s @@ -0,0 +1,117 @@ +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld -propeller=%S/Inputs/propeller-2.data -propeller-dump-symbol-order=%t2.symorder %t.o -o %t.out +# RUN: cat %t2.symorder | FileCheck %s --check-prefix=SYM_ORDER +# SYM_ORDER: Hot +# SYM_ORDER: aaaa.BB.main +# SYM_ORDER: aaaaaa.BB.main + +## Symbol "aaaaaa.BB.main" is removed because it is adjacent to aaaa.BB.main. +# RUN: { llvm-nm %t.out | grep -cF "aaaaaa.BB.main" 2>&1 || true ; } | FileCheck %s --check-prefix=SYM_COUNT +# SYM_COUNT: 0 + +## Symbol "aaaa.BB.main" is kept, because it starts a new cold code section for function "main". +# Run: llvm-nm -S %t.out | grep -cF "aaaa.BB.main" | FileCheck %s --check-prefix=SYM_COUNT1 +# SYM_COUNT1: 1 + + .text + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4 +.LCPI0_0: + .quad 4640642756656496640 + .zero 8 + .section .text.this_is_very_cold,"ax",@progbits + .globl this_is_very_cold + .p2align 4, 0x90 + .type this_is_very_cold,@function +this_is_very_cold: +# %bb.0: + movabsq $4749492747822432256, %rax + retq +.Lfunc_end0: + .size this_is_very_cold, .Lfunc_end0-this_is_very_cold + .section .text.compute_flag,"ax",@progbits + + .globl compute_flag + .p2align 4, 0x90 + .type compute_flag,@function +compute_flag: +# %bb.0: + movslq %edi, %rcx + cmpl $4, %edx + cmovll %ecx, %eax + retq +.Lfunc_end1: + .size compute_flag, .Lfunc_end1-compute_flag + + .section .text.main,"ax",@progbits + .globl main + .p2align 4, 0x90 + .type main,@function +main: +# %bb.0: + pushq %rbx + subq $16, %rsp + movabsq $4742870812841738240, %rax # imm = 0x41D20FE01F000000 + jmp a.BB.main + .section .text.main,"ax",@progbits,unique,1 + .p2align 4, 0x90 + +aaaaa.BB.main: + addl $1, %ebx + cmpl $2000000000, %ebx + je aaaaaa.BB.main + jmp a.BB.main +.Ltmp0: + .size aaaaa.BB.main, .Ltmp0-aaaaa.BB.main + .section .text.main,"ax",@progbits,unique,2 +a.BB.main: + + movl %ebx, %edi + callq compute_flag + addl $1, count(%rip) + testl %eax, %eax + je aaa.BB.main + jmp aa.BB.main +.Ltmp1: + .size a.BB.main, .Ltmp1-a.BB.main + .section .text.main,"ax",@progbits,unique,3 +aa.BB.main: + + movsd (%rsp), %xmm0 + jmp aaa.BB.main +.Ltmp2: + .size aa.BB.main, .Ltmp2-aa.BB.main + .section .text.main,"ax",@progbits,unique,4 +aaa.BB.main: + + movslq count(%rip), %rax + cmpl $183, %eax + jne aaaaa.BB.main + jmp aaaa.BB.main +.Ltmp3: + .size aaa.BB.main, .Ltmp3-aaa.BB.main + .section .text.main,"ax",@progbits,unique,5 +aaaa.BB.main: + + xorps %xmm0, %xmm0 + cvtsi2sdl count(%rip), %xmm0 + callq this_is_very_cold + addsd (%rsp), %xmm0 + movsd %xmm0, (%rsp) + jmp aaaaa.BB.main +.Ltmp4: + .size aaaa.BB.main, .Ltmp4-aaaa.BB.main + .section .text.main,"ax",@progbits,unique,6 +aaaaaa.BB.main: + xorl %eax, %eax + addq $16, %rsp + popq %rbx + retq +.Ltmp5: + .size aaaaaa.BB.main, .Ltmp5-aaaaaa.BB.main + .section .text.main,"ax",@progbits +.Lfunc_end2: + .size main, .Lfunc_end2-main + + .type count,@object + .comm count,4,4 Index: lld/test/ELF/propeller/propeller-lto-bbsections-dump.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-lto-bbsections-dump.s @@ -0,0 +1,63 @@ +# REQUIRES: x86 +## Test control flow graph is created. + +# RUN: clang -c -flto=thin -fbasicblock-sections=all -O2 %S/Inputs/sample.c -o %t.o +# RUN: file %t.o | FileCheck %s --check-prefix=FILE_TYPE +# FILE_TYPE: LLVM IR bitcode +# RUN: clang -fuse-ld=lld -fbasicblock-sections=all -Wl,-lto-basicblock-sections=all -Wl,-propeller-dump-cfg=main -Wl,-propeller=%S/Inputs/propeller.data -O2 %t.o -o %t.out +# RUN: cat %T/main.dot | FileCheck %s --check-prefix=LTO_CFG + +# LTO_CFG: 0 [size="48"];3 [size="11"];1 [size="18"];2 [size="38"];4 [size="8"]; +# LTO_CFG: 0 -> 1 +# LTO_CFG: 3 -> 4 +# LTO_CFG: 3 -> 1 [label="273908" +# LTO_CFG: 1 -> 3 [label="165714" +# LTO_CFG: 1 -> 2 [label="106891" +# LTO_CFG: 2 -> 3 [label="110451" + +.text + .section .text.compute_flag,"ax",@progbits + .globl compute_flag + .type compute_flag,@function +compute_flag: + movslq %edi, %rcx + retq +.Lfunc_end0: + .size compute_flag, .Lfunc_end0-compute_flag + + .section .text.main,"ax",@progbits + .globl main + .type main,@function +main: + pushq %rbx + jmp a.BB.main # 0 -> 1 + + .section .text.main,"ax",@progbits,unique,1 +aaa.BB.main: + je aaaa.BB.main # 3 -> 4 + jmp a.BB.main # 3 -> 1 +.Ltmp0: + .size aaa.BB.main, .Ltmp0-aaa.BB.main + + .section .text.main,"ax",@progbits,unique,2 +a.BB.main: + je aaa.BB.main # 1 -> 3 + jmp aa.BB.main # 1 -> 2 +.Ltmp1: + .size a.BB.main, .Ltmp1-a.BB.main + + .section .text.main,"ax",@progbits,unique,3 +aa.BB.main: + jmp aaa.BB.main # 2 -> 3 +.Ltmp2: + .size aa.BB.main, .Ltmp2-aa.BB.main + + .section .text.main,"ax",@progbits,unique,4 +aaaa.BB.main: + retq +.Ltmp3: + .size aaaa.BB.main, .Ltmp3-aaaa.BB.main + + .section .text.main,"ax",@progbits +.Lfunc_end1: + .size main, .Lfunc_end1-main Index: lld/test/ELF/propeller/propeller-opt-all-combinations.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-opt-all-combinations.s @@ -0,0 +1,295 @@ +# REQUIRES: x86 +## Basic propeller tests. +## This test exercises all combinations of propeller options (reorder-funcs, reorder-blocks, +## and split-funcs) on four functions. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +# RUN: ld.lld %t.o -optimize-bb-jumps -o %t.out +# RUN: llvm-objdump -d %t.out| FileCheck %s --check-prefix=BEFORE + +# BEFORE: Disassembly of section .text: +# BEFORE-EMPTY: +# BEFORE-NEXT: foo: +# BEFORE-NEXT: xorb %al, 0 +# BEFORE-NEXT: int3 +# BEFORE-EMPTY: +# BEFORE-NEXT: bar: +# BEFORE-NEXT: xorb %al, 1 +# BEFORE-NEXT: je 9 +# BEFORE-EMPTY: +# BEFORE-NEXT: a.BB.bar: +# BEFORE-NEXT: xorb %al, 2 +# BEFORE-NEXT: jmp 7 +# BEFORE-EMPTY: +# BEFORE-NEXT: aa.BB.bar: +# BEFORE-NEXT: xorb %al, 3 +# BEFORE-EMPTY: +# BEFORE-NEXT: aaa.BB.bar: +# BEFORE-NEXT: xorb %al, 4 +# BEFORE-EMPTY: +# BEFORE-NEXT: baz: +# BEFORE-NEXT: xorb %al, 5 +# BEFORE-NEXT: int3 +# BEFORE-EMPTY: +# BEFORE-NEXT: qux: +# BEFORE-NEXT: xorb %al, 6 +# + +## Create a propeller profile for four functions foo, bar, baz, and qux based on the cfg below: +## +## +## bar +## / \ +## / \100 +## / \ +## 100 v v +## foo <----- a.BB.bar aa.BB.bar baz qux ---+ +## \ / ^ | +## \ /100 | 1 | +## \ / +-----+ +## v v +## aaa.BB.bar +## + +# RUN: echo "Symbols" > %t_prof.propeller +# RUN: echo "1 8 Nfoo" >> %t_prof.propeller +# RUN: echo "2 20 Nbar" >> %t_prof.propeller +# RUN: echo "3 9 2.1" >> %t_prof.propeller +# RUN: echo "4 7 2.2" >> %t_prof.propeller +# RUN: echo "5 7 2.3" >> %t_prof.propeller +# RUN: echo "6 8 Nbaz" >> %t_prof.propeller +# RUN: echo "7 7 Nqux" >> %t_prof.propeller +# RUN: echo "Branches" >> %t_prof.propeller +# RUN: echo "2 4 100" >> %t_prof.propeller +# RUN: echo "4 1 100 C" >> %t_prof.propeller +# RUN: echo "7 7 10 C" >> %t_prof.propeller +# RUN: echo "Fallthroughs" >> %t_prof.propeller +# RUN: echo "4 5 100" >> %t_prof.propeller + +# RUN: ld.lld %t.o -optimize-bb-jumps -propeller=%t_prof.propeller --verbose -propeller-keep-named-symbols -o %t.propeller.reorder.out +# RUN: llvm-objdump -d %t.propeller.reorder.out| FileCheck %s --check-prefix=REORDER + +# REORDER: Disassembly of section .text: +# REORDER-EMPTY: +# REORDER-NEXT: bar: +# REORDER-NEXT: xorb %al, 1 +# REORDER-NEXT: jne 30 +# REORDER-EMPTY: +# REORDER-NEXT: aa.BB.bar: +# REORDER-NEXT: xorb %al, 3 +# REORDER-EMPTY: +# REORDER-NEXT: aaa.BB.bar: +# REORDER-NEXT: xorb %al, 4 +# REORDER-NEXT: int3 +# REORDER-EMPTY: +# REORDER-NEXT: foo: +# REORDER-NEXT: xorb %al, 0 +# REORDER-NEXT: int3 +# REORDER-EMPTY: +# REORDER-NEXT: qux: +# REORDER-NEXT: xorb %al, 6 +# REORDER-EMPTY: +# REORDER-NEXT: a.BB.bar: +# REORDER-NEXT: xorb %al, 2 +# REORDER-NEXT: jmp -32 +# REORDER-EMPTY: +# REORDER-NEXT: baz: +# REORDER-NEXT: xorb %al, 5 +# + +# RUN: ld.lld %t.o -optimize-bb-jumps -propeller=%t_prof.propeller -propeller-opt=no-reorder-blocks -propeller-keep-named-symbols -o %t.propeller.noreorderblocks.out 2>&1 | FileCheck %s --check-prefixes=WARN,IMPLICITNOSPLIT +# WARN-NOT: warning: +# IMPLICITNOSPLIT: warning: propeller: no-reorder-blocks implicitly sets no-split-funcs. +# +# RUN: llvm-objdump -d %t.propeller.noreorderblocks.out| FileCheck %s --check-prefix=NO_REORDER_BB +# +# NO_REORDER_BB: Disassembly of section .text: +# NO_REORDER_BB-EMPTY: +# NO_REORDER_BB-NEXT: bar: +# NO_REORDER_BB-NEXT: xorb %al, 1 +# NO_REORDER_BB-NEXT: je 9 +# NO_REORDER_BB-EMPTY: +# NO_REORDER_BB-NEXT: a.BB.bar: +# NO_REORDER_BB-NEXT: xorb %al, 2 +# NO_REORDER_BB-NEXT: jmp 7 +# NO_REORDER_BB-EMPTY: +# NO_REORDER_BB-NEXT: aa.BB.bar: +# NO_REORDER_BB-NEXT: xorb %al, 3 +# NO_REORDER_BB-EMPTY: +# NO_REORDER_BB-NEXT: aaa.BB.bar: +# NO_REORDER_BB-NEXT: xorb %al, 4 +# NO_REORDER_BB-EMPTY: +# NO_REORDER_BB-NEXT: foo: +# NO_REORDER_BB-NEXT: xorb %al, 0 +# NO_REORDER_BB-NEXT: int3 +# NO_REORDER_BB-EMPTY: +# NO_REORDER_BB-NEXT: qux: +# NO_REORDER_BB-NEXT: xorb %al, 6 +# NO_REORDER_BB-NEXT: int3 +# NO_REORDER_BB-EMPTY: +# NO_REORDER_BB-NEXT: baz: +# NO_REORDER_BB-NEXT: xorb %al, 5 + +# RUN: ld.lld %t.o -optimize-bb-jumps -propeller=%t_prof.propeller -propeller-opt=no-reorder-funcs -propeller-keep-named-symbols -o %t.propeller.noreorderfuncs.out +# RUN: llvm-objdump -d %t.propeller.noreorderfuncs.out| FileCheck %s --check-prefix=NO_REORDER_FUNC +# +# NO_REORDER_FUNC: Disassembly of section .text: +# NO_REORDER_FUNC-EMPTY: +# NO_REORDER_FUNC-NEXT: foo: +# NO_REORDER_FUNC-NEXT: xorb %al, 0 +# NO_REORDER_FUNC-NEXT: int3 +# NO_REORDER_FUNC-EMPTY: +# NO_REORDER_FUNC-NEXT: bar: +# NO_REORDER_FUNC-NEXT: xorb %al, 1 +# NO_REORDER_FUNC-NEXT: jne 22 +# NO_REORDER_FUNC-EMPTY: +# NO_REORDER_FUNC-NEXT: aa.BB.bar: +# NO_REORDER_FUNC-NEXT: xorb %al, 3 +# NO_REORDER_FUNC-EMPTY: +# NO_REORDER_FUNC-NEXT: aaa.BB.bar: +# NO_REORDER_FUNC-NEXT: xorb %al, 4 +# NO_REORDER_FUNC-NEXT: int3 +# NO_REORDER_FUNC-EMPTY: +# NO_REORDER_FUNC-NEXT: qux: +# NO_REORDER_FUNC-NEXT: xorb %al, 6 +# NO_REORDER_FUNC-EMPTY: +# NO_REORDER_FUNC-NEXT: a.BB.bar: +# NO_REORDER_FUNC-NEXT: xorb %al, 2 +# NO_REORDER_FUNC-NEXT: jmp -24 +# NO_REORDER_FUNC-EMPTY: +# NO_REORDER_FUNC-NEXT: baz: +# NO_REORDER_FUNC-NEXT: xorb %al, 5 +# + +# RUN: ld.lld %t.o -optimize-bb-jumps -propeller=%t_prof.propeller -propeller-opt=no-split-funcs -propeller-keep-named-symbols -o %t.propeller.nosplitfuncs.out +# RUN: llvm-objdump -d %t.propeller.nosplitfuncs.out| FileCheck %s --check-prefix=NO_SPLIT_FUNC +# +# NO_SPLIT_FUNC: Disassembly of section .text: +# NO_SPLIT_FUNC-EMPTY: +# NO_SPLIT_FUNC-NEXT: bar: +# NO_SPLIT_FUNC-NEXT: xorb %al, 1 +# NO_SPLIT_FUNC-NEXT: jne 14 +# NO_SPLIT_FUNC-EMPTY: +# NO_SPLIT_FUNC-NEXT: aa.BB.bar: +# NO_SPLIT_FUNC-NEXT: xorb %al, 3 +# NO_SPLIT_FUNC-EMPTY: +# NO_SPLIT_FUNC-NEXT: aaa.BB.bar: +# NO_SPLIT_FUNC-NEXT: xorb %al, 4 +# NO_SPLIT_FUNC-EMPTY: +# NO_SPLIT_FUNC-NEXT: a.BB.bar: +# NO_SPLIT_FUNC-NEXT: xorb %al, 2 +# NO_SPLIT_FUNC-NEXT: jmp -16 +# NO_SPLIT_FUNC-EMPTY: +# NO_SPLIT_FUNC-NEXT: foo: +# NO_SPLIT_FUNC-NEXT: xorb %al, 0 +# NO_SPLIT_FUNC-NEXT: int3 +# NO_SPLIT_FUNC-EMPTY: +# NO_SPLIT_FUNC-NEXT: qux: +# NO_SPLIT_FUNC-NEXT: xorb %al, 6 +# NO_SPLIT_FUNC-NEXT: int3 +# NO_SPLIT_FUNC-EMPTY: +# NO_SPLIT_FUNC-NEXT: baz: +# NO_SPLIT_FUNC-NEXT: xorb %al, 5 +# + +## Check that the combination of no-reorder-blocks and split-funcs makes lld fail. +# RUN: not ld.lld %t.o -propeller=%t_prof.propeller -propeller-opt=no-reorder-blocks -propeller-opt=split-funcs 2>&1 -o /dev/null | FileCheck %s --check-prefix=FAIL +# FAIL: propeller: Inconsistent combination of propeller optimizations 'split-funcs' and 'no-reorder-blocks'. + + +# RUN: ld.lld %t.o -optimize-bb-jumps -propeller=%t_prof.propeller -propeller-opt=no-split-funcs -propeller-opt=no-reorder-funcs -propeller-keep-named-symbols -o %t.propeller.nosplitreorderfuncs.out +# RUN: llvm-objdump -d %t.propeller.nosplitreorderfuncs.out| FileCheck %s --check-prefix=NO_SPLIT_REORDER_FUNC +# +# NO_SPLIT_REORDER_FUNC: Disassembly of section .text: +# NO_SPLIT_REORDER_FUNC-EMPTY: +# NO_SPLIT_REORDER_FUNC-NEXT: foo: +# NO_SPLIT_REORDER_FUNC-NEXT: xorb %al, 0 +# NO_SPLIT_REORDER_FUNC-NEXT: int3 +# NO_SPLIT_REORDER_FUNC-EMPTY: +# NO_SPLIT_REORDER_FUNC-NEXT: bar: +# NO_SPLIT_REORDER_FUNC-NEXT: xorb %al, 1 +# NO_SPLIT_REORDER_FUNC-NEXT: jne 14 +# NO_SPLIT_REORDER_FUNC-EMPTY: +# NO_SPLIT_REORDER_FUNC-NEXT: aa.BB.bar: +# NO_SPLIT_REORDER_FUNC-NEXT: xorb %al, 3 +# NO_SPLIT_REORDER_FUNC-EMPTY: +# NO_SPLIT_REORDER_FUNC-NEXT: aaa.BB.bar: +# NO_SPLIT_REORDER_FUNC-NEXT: xorb %al, 4 +# NO_SPLIT_REORDER_FUNC-EMPTY: +# NO_SPLIT_REORDER_FUNC-NEXT: a.BB.bar: +# NO_SPLIT_REORDER_FUNC-NEXT: xorb %al, 2 +# NO_SPLIT_REORDER_FUNC-NEXT: jmp -16 +# NO_SPLIT_REORDER_FUNC-EMPTY: +# NO_SPLIT_REORDER_FUNC-NEXT: baz: +# NO_SPLIT_REORDER_FUNC-NEXT: xorb %al, 5 +# NO_SPLIT_REORDER_FUNC-NEXT: int3 +# NO_SPLIT_REORDER_FUNC-EMPTY: +# NO_SPLIT_REORDER_FUNC-NEXT: qux: +# NO_SPLIT_REORDER_FUNC-NEXT: xorb %al, 6 + +.section .text,"ax",@progbits +# -- Begin function foo +.type foo,@function +.align 4 +foo: + xor %al,0 + +.Lfoo_end: + .size foo, .Lfoo_end-foo +# -- End function foo + +.section .text,"ax",@progbits,unique,1 +# -- Begin function bar +.type bar,@function +.align 4 +bar: + xor %al,1 + je aa.BB.bar + jmp a.BB.bar + +.section .text,"ax",@progbits,unique,2 +a.BB.bar: + xor %al,2 + jmp aaa.BB.bar +.La.BB.bar_end: + .size a.BB.bar, .La.BB.bar_end-a.BB.bar + +.section .text,"ax",@progbits,unique,3 +aa.BB.bar: + xor %al,3 + jmp aaa.BB.bar +.Laa.BB.bar_end: + .size aa.BB.bar, .Laa.BB.bar_end-aa.BB.bar + +.section .text,"ax",@progbits,unique,4 +aaa.BB.bar: + xor %al,4 +.Laaa.BB.bar_end: + .size aaa.BB.bar, .Laaa.BB.bar_end-aaa.BB.bar + +.section .text,"ax",@progbits,unique,1 +.Lbar_end: + .size bar, .Lbar_end-bar +# -- End function bar + +.section .text,"ax",@progbits,unique,5 +# -- Begin function baz +.type baz,@function +.align 4 +baz: + xor %al,5 + +.Lbaz_end: + .size baz, .Lbaz_end-baz +# -- End function baz + +.section .text,"ax",@progbits,unique,6 +# -- Begin function qux +.type qux,@function +.align 4 +qux: + xor %al,6 + +.Lqux_end: + .size qux, .Lqux_end-qux +# -- End function qux Index: lld/test/ELF/propeller/propeller-skip.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-skip.s @@ -0,0 +1,9 @@ +# REQUIRES: x86 +## Test lld honors "@" directive. The input propeller-3.data contains a +## "@" with an invalid output file name. ld.lld won't activate propeller +## because "-o" output name does not match what is following "@". + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld -propeller=%S/Inputs/propeller-3.data %t.o -o %t.out 2>&1 | FileCheck %s --check-prefix=CHECK + +# CHECK: warning: [Propeller]: Propeller skipped Index: lld/test/ELF/propeller/propeller-symbol-order-dump.s =================================================================== --- /dev/null +++ lld/test/ELF/propeller/propeller-symbol-order-dump.s @@ -0,0 +1,62 @@ +# REQUIRES: x86 +## Test dump symbol order works good. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld -propeller=%S/Inputs/propeller.data -propeller-dump-symbol-order=%t2.symorder %t.o -o %t.out + +# RUN: cat %t2.symorder | FileCheck %s --check-prefix=SYMBOL_ORDER + +# SYMBOL_ORDER: main +# SYMBOL_ORDER: aa.BB.main +# SYMBOL_ORDER: aaa.BB.main +# SYMBOL_ORDER: a.BB.main +# SYMBOL_ORDER: compute_flag +# SYMBOL_ORDER: Hot +# SYMBOL_ORDER: aaaa.BB.main + +.text + .section .text.compute_flag,"ax",@progbits + .globl compute_flag + .type compute_flag,@function +compute_flag: + movslq %edi, %rcx + retq +.Lfunc_end0: + .size compute_flag, .Lfunc_end0-compute_flag + + .section .text.main,"ax",@progbits + .globl main + .type main,@function +main: + pushq %rbx + jmp a.BB.main # 0 -> 1 + + .section .text.main,"ax",@progbits,unique,1 +aaa.BB.main: + je aaaa.BB.main # 3 -> 4 + jmp a.BB.main # 3 -> 1 +.Ltmp0: + .size aaa.BB.main, .Ltmp0-aaa.BB.main + + .section .text.main,"ax",@progbits,unique,2 +a.BB.main: + je aaa.BB.main # 1 -> 3 + jmp aa.BB.main # 1 -> 2 +.Ltmp1: + .size a.BB.main, .Ltmp1-a.BB.main + + .section .text.main,"ax",@progbits,unique,3 +aa.BB.main: + jmp aaa.BB.main # 2 -> 3 +.Ltmp2: + .size aa.BB.main, .Ltmp2-aa.BB.main + + .section .text.main,"ax",@progbits,unique,4 +aaaa.BB.main: + retq +.Ltmp3: + .size aaaa.BB.main, .Ltmp3-aaaa.BB.main + + .section .text.main,"ax",@progbits +.Lfunc_end1: + .size main, .Lfunc_end1-main