diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -988,6 +988,20 @@ StringSaver Saver; BumpPtrAllocator Alloc; + /// Functions with hash code as a measure of structural similarity. + /// Used by MergeSimilarFunctions. The value_type is the hash code + /// computed by profileFunction. + std::map FunctionSimilarityHashes; + /// Reverse map of @var SimilarFunctionsHash to be used during thin-lto. + /// Each GUID in the vector corresponds to functions with same hash (id). + std::map> SimilarFunctions; + /// Functions having multiple entries in ModuleSummaryIndex. + std::set DuplicateFunctions; + /// All similar functions w.r.t the one in this set will be imported into + /// the module of these functions during thin-lto stage. + /// FIXME: Maybe use a vector? + std::set HostSimilarFunction; + // YAML I/O support. friend yaml::MappingTraits; diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -281,6 +281,9 @@ ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str, raw_ostream *ThinLinkOS = nullptr); +/// Decide which functions to import for function merging during the thinlto. +/// Populates information from each function summary to module summary index. +void computeMergeSimilarFunctions(ModuleSummaryIndex &Index); } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h --- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -253,7 +253,6 @@ ModuleSummaryIndex &Summary, StringMap &ExportLists, std::map> &LocalWPDTargetsMap); - } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -77,6 +77,8 @@ extern cl::opt RemarksFormat; } +extern cl::opt EnableMergeSimilarFunctions; + namespace { static cl::opt @@ -919,6 +921,8 @@ // Sequential linking phase auto Index = linkCombinedIndex(); + if (EnableMergeSimilarFunctions) + computeMergeSimilarFunctions(*Index); // Save temps: index. if (!SaveTempsDir.empty()) { diff --git a/llvm/lib/Transforms/IPO/MergeSimilarFunctions.cpp b/llvm/lib/Transforms/IPO/MergeSimilarFunctions.cpp --- a/llvm/lib/Transforms/IPO/MergeSimilarFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeSimilarFunctions.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -1300,6 +1301,42 @@ return new MergeSimilarFunctions(S); } +void llvm::computeMergeSimilarFunctions(ModuleSummaryIndex &Index) { + assert(Index.getSimilarFunctions().empty() && "Inserting SimHash twice"); + for (auto &P : Index) { + for (auto &S : P.second.SummaryList) { + FunctionSummary *FS = dyn_cast(S.get()); + if (!FS) + continue; + LLVM_DEBUG(llvm::errs() << "\nSimilarity hash: " << FS->similarityHash()); + Index.addToSimilarFunctions(FS->similarityHash(), FS->getOriginalName()); + } + } + Index.populateReverseSimilarityHashMap(); + llvm::errs() << "\nSize SimilarFunctionsHash: " + << Index.getSimilarFunctionsHash().size(); + llvm::errs() << "\nSize SimilarFunctions: " + << Index.getSimilarFunctions().size(); + Index.removeSingleEntriesFromSimHashMaps(); + + LLVM_DEBUG(llvm::errs() << "\nSize SimilarFunctionsHash: " + << Index.getSimilarFunctionsHash().size()); + LLVM_DEBUG(llvm::errs() << "\nSize SimilarFunctions: " + << Index.getSimilarFunctions().size()); + + auto &SimFunctions = Index.getSimilarFunctions(); + // Shouldn't have entries with hash of 0, because those are placeholders. + assert(!SimFunctions.count(0)); + auto &HostSimFunction = Index.getHostSimilarFunction(); + for (auto I = SimFunctions.begin(), E = SimFunctions.end(); I != E; ++I) { + // Make the first of all similar functions as host. + HostSimFunction.insert(I->second[0]); + } + LLVM_DEBUG(llvm::errs() << "\nSize getHostSimilarFunction: " + << Index.getHostSimilarFunction().size()); + return; +} + bool MergeSimilarFunctions::runOnModule(Module &M) { if (Opt::MergeLevel == Opt::none) return false; diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -147,7 +147,7 @@ "enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)")); -static cl::opt EnableMergeSimilarFunctions( +cl::opt EnableMergeSimilarFunctions( "enable-merge-sim-functions", cl::init(false), cl::Hidden, cl::desc("Enable the Function merging pass (default = on)"));