Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -42,8 +42,18 @@ #include #include +namespace Opt { +enum MergeLevelEnum { none, size, all }; +} + namespace llvm { +// This is for now until I figure out a better place to put this declaration in. +// Compute a similarity metric for function F. Useful for merging functions. +unsigned profileFunction(const Function *F); +bool isComparisonCandidate(const Function *F); +bool isAliasCapable(const Function* G); + namespace yaml { template struct MappingTraits; @@ -499,7 +509,7 @@ std::vector(), std::vector(), std::vector(), - std::vector()); + std::vector(), 0); } /// A dummy node to reference external functions that aren't in the index @@ -518,6 +528,7 @@ std::vector CallGraphEdgeList; std::unique_ptr TIdInfo; + unsigned SimilarityHash; public: FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags, @@ -526,10 +537,12 @@ std::vector TypeTestAssumeVCalls, std::vector TypeCheckedLoadVCalls, std::vector TypeTestAssumeConstVCalls, - std::vector TypeCheckedLoadConstVCalls) + std::vector TypeCheckedLoadConstVCalls, + unsigned SimHash) : GlobalValueSummary(FunctionKind, Flags, std::move(Refs)), InstCount(NumInsts), FunFlags(FunFlags), - CallGraphEdgeList(std::move(CGEdges)) { + CallGraphEdgeList(std::move(CGEdges)), + SimilarityHash(SimHash) { if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() || !TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() || !TypeCheckedLoadConstVCalls.empty()) @@ -545,6 +558,9 @@ return GVS->getSummaryKind() == FunctionKind; } + /// Get the SimilarityHash of this function. + unsigned similarityHash() { return SimilarityHash; } + /// Get function attribute flags. FFlags fflags() const { return FunFlags; } Index: include/llvm/IR/ModuleSummaryIndexYAML.h =================================================================== --- include/llvm/IR/ModuleSummaryIndexYAML.h +++ include/llvm/IR/ModuleSummaryIndexYAML.h @@ -229,7 +229,7 @@ std::move(FSum.TypeTestAssumeVCalls), std::move(FSum.TypeCheckedLoadVCalls), std::move(FSum.TypeTestAssumeConstVCalls), - std::move(FSum.TypeCheckedLoadConstVCalls))); + std::move(FSum.TypeCheckedLoadConstVCalls), 0)); } } static void output(IO &io, GlobalValueSummaryMapTy &V) { Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -428,6 +428,9 @@ F.isVarArg() || // Don't try to import functions with noinline attribute. F.getAttributes().hasFnAttribute(Attribute::NoInline); + // SimHash of 0 means just a placeholder. + unsigned SimHash = llvm::profileFunction(&F); + GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, /* Live = */ false, F.isDSOLocal()); FunctionSummary::FFlags FunFlags{ @@ -441,7 +444,7 @@ CallGraphEdges.takeVector(), TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(), TypeTestAssumeConstVCalls.takeVector(), - TypeCheckedLoadConstVCalls.takeVector()); + TypeCheckedLoadConstVCalls.takeVector(), SimHash); if (NonRenamableLocal) CantBePromoted.insert(F.getGUID()); Index.addGlobalValueSummary(F, std::move(FuncSummary)); @@ -554,7 +557,7 @@ ArrayRef{}, ArrayRef{}, ArrayRef{}, - ArrayRef{}); + ArrayRef{}, 0); Index.addGlobalValueSummary(*GV, std::move(Summary)); } else { std::unique_ptr Summary = Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -7601,7 +7601,7 @@ std::move(TypeIdInfo.TypeTestAssumeVCalls), std::move(TypeIdInfo.TypeCheckedLoadVCalls), std::move(TypeIdInfo.TypeTestAssumeConstVCalls), - std::move(TypeIdInfo.TypeCheckedLoadConstVCalls)); + std::move(TypeIdInfo.TypeCheckedLoadConstVCalls), 0); FS->setModulePath(ModulePath); Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -5251,9 +5251,9 @@ std::make_pair(TheIndex.getOrInsertValueInfo(RefGUID), RefGUID); break; } - // FS_PERMODULE: [valueid, flags, instcount, fflags, numrefs, + // FS_PERMODULE: [valueid, flags, instcount, fflags, simhash, numrefs, // numrefs x valueid, n x (valueid)] - // FS_PERMODULE_PROFILE: [valueid, flags, instcount, fflags, numrefs, + // FS_PERMODULE_PROFILE: [valueid, flags, instcount, fflags, simhash, numrefs, // numrefs x valueid, // n x (valueid, hotness)] // FS_PERMODULE_RELBF: [valueid, flags, instcount, fflags, numrefs, @@ -5266,12 +5266,14 @@ uint64_t RawFlags = Record[1]; unsigned InstCount = Record[2]; uint64_t RawFunFlags = 0; + unsigned SimHash = 0; unsigned NumRefs = Record[3]; int RefListStartIndex = 4; if (Version >= 4) { RawFunFlags = Record[3]; - NumRefs = Record[4]; - RefListStartIndex = 5; + SimHash = Record[4]; + NumRefs = Record[5]; + RefListStartIndex = 6; } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); @@ -5296,7 +5298,7 @@ std::move(PendingTypeTestAssumeVCalls), std::move(PendingTypeCheckedLoadVCalls), std::move(PendingTypeTestAssumeConstVCalls), - std::move(PendingTypeCheckedLoadConstVCalls)); + std::move(PendingTypeCheckedLoadConstVCalls), SimHash); PendingTypeTests.clear(); PendingTypeTestAssumeVCalls.clear(); PendingTypeCheckedLoadVCalls.clear(); @@ -5352,9 +5354,9 @@ TheIndex.addGlobalValueSummary(GUID.first, std::move(FS)); break; } - // FS_COMBINED: [valueid, modid, flags, instcount, fflags, numrefs, + // FS_COMBINED: [valueid, modid, flags, instcount, fflags, simhash, numrefs, // numrefs x valueid, n x (valueid)] - // FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, fflags, numrefs, + // FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, fflags, simhash, numrefs, // numrefs x valueid, n x (valueid, hotness)] case bitc::FS_COMBINED: case bitc::FS_COMBINED_PROFILE: { @@ -5363,13 +5365,15 @@ uint64_t RawFlags = Record[2]; unsigned InstCount = Record[3]; uint64_t RawFunFlags = 0; + unsigned SimHash = 0; unsigned NumRefs = Record[4]; int RefListStartIndex = 5; if (Version >= 4) { RawFunFlags = Record[4]; - NumRefs = Record[5]; - RefListStartIndex = 6; + SimHash = Record[5]; + NumRefs = Record[6]; + RefListStartIndex = 7; } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); @@ -5389,7 +5393,7 @@ std::move(PendingTypeTestAssumeVCalls), std::move(PendingTypeCheckedLoadVCalls), std::move(PendingTypeTestAssumeConstVCalls), - std::move(PendingTypeCheckedLoadConstVCalls)); + std::move(PendingTypeCheckedLoadConstVCalls), SimHash); PendingTypeTests.clear(); PendingTypeTestAssumeVCalls.clear(); PendingTypeCheckedLoadVCalls.clear(); Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3487,6 +3487,7 @@ NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); + NameVals.push_back(FS->similarityHash()); NameVals.push_back(FS->refs().size()); for (auto &RI : FS->refs()) @@ -3579,6 +3580,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // simhash Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); @@ -3595,6 +3597,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // simhash Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid [, rel_block_freq]) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); @@ -3689,6 +3692,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // simhash Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); @@ -3703,6 +3707,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // simhash Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); @@ -3800,6 +3805,7 @@ NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); + NameVals.push_back(FS->similarityHash()); // Fill in below NameVals.push_back(0); Index: lib/Transforms/IPO/MergeSimilarFunctions.cpp =================================================================== --- lib/Transforms/IPO/MergeSimilarFunctions.cpp +++ lib/Transforms/IPO/MergeSimilarFunctions.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" @@ -54,10 +55,6 @@ STATISTIC(NumSimilarFunctionsMerged, "Number of similar functions merged"); -static cl::opt MergeMinInsts( - "mergesimilarfunc-min-insts", cl::Hidden, cl::init(4), - cl::desc("Min instructions required to even consider single block fns")); - static cl::opt MergeDifferingMinInsts( "mergesimilarfunc-diff-min-insts", cl::Hidden, cl::init(15), cl::desc("Min instructions required to try merging differing functions")); @@ -73,9 +70,8 @@ static cl::opt OptPrintMerges("mergesimilarfunc-print-merges", cl::Hidden, cl::init(false)); -static cl::opt UseGlobalAliases( - "mergesimilarfunc-global-aliases", cl::Hidden, cl::init(false), - cl::desc("Enable writing alias by enabling global aliases")); +extern cl::opt UseGlobalAliases; +extern cl::opt MergeMinInsts; void PrintMerges(const char *Desc, Function *Old, Function *New) { if (OptPrintMerges) { @@ -84,20 +80,16 @@ } } -// Minimize the name pollution caused by the enum values. namespace Opt { -enum MergeLevelEnum { none, size, all }; -static cl::opt MergeLevel( - "mergesimilarfunc-level", cl::Hidden, cl::ZeroOrMore, - cl::desc("Level of function merging:"), cl::init(size), - cl::values(clEnumVal(none, "function merging disabled"), - clEnumVal(size, "only try to merge functions that are optimized " - "for size"), - clEnumVal(all, "attempt to merge all similar functions"))); + extern cl::opt MergeLevel; } static const char *MERGED_SUFFIX = "__merged"; +namespace llvm { +unsigned profileFunction(const Function *F); +} +/* /// Returns the type id for a type to be hashed. We turn pointer types into /// integers here because the actual compare logic below considers pointers and /// integers of the same size as equal. @@ -110,7 +102,8 @@ /// Creates a hash-code for the function which is the same for any two /// functions that will compare equal, without looking at the instructions /// inside the function. -static unsigned profileFunction(const Function *F) { +namespace llvm { +unsigned profileFunction(const Function *F) { FunctionType *FTy = F->getFunctionType(); FoldingSetNodeID ID; @@ -124,7 +117,7 @@ ID.AddInteger(getTypeIDForHash(FTy->getParamType(i))); return ID.ComputeHash(); } - +}*/ /// Replace Inst1 by a switch statement that executes Inst1 or one of Inst2s /// depending on the value of SwitchVal. If a value in Inst2s is NULL, it @@ -1120,35 +1113,6 @@ FunctionsToCompare.clear(); } -static bool isAliasCapable(Function* G) { - return - UseGlobalAliases && G->hasGlobalUnnamedAddr() - && (G->hasExternalLinkage() || G->hasLocalLinkage() || G->hasWeakLinkage()); -} - -static bool isComparisonCandidate(Function *F) { - if (Opt::MergeLevel == Opt::size) { - // Only consider functions that are to be optimized for size. - // By default, that is all functions at -Os/-Oz and nothing at -O2. - bool Os = F->getAttributes(). - hasAttribute(AttributeList::FunctionIndex, Attribute::OptimizeForSize); - bool Oz = F->getAttributes(). - hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize); - if (!Os && !Oz) - return false; - } - - // Ignore declarations and tiny functions - no point in merging those - if (F->isDeclaration()) return false; - if (F->getName().endswith(MERGED_SUFFIX)) return false; - if (F->hasAvailableExternallyLinkage()) return false; - if (F->hasFnAttribute(Attribute::AlwaysInline)) return false; - if (F->size() == 1 && F->begin()->size() < MergeMinInsts) - return isAliasCapable(F); - - return true; -} - void MergeRegistry::defer(Function *F) { if (isComparisonCandidate(F)) Deferred.push_back(F); @@ -1168,9 +1132,11 @@ Function *F = dyn_cast_or_null(V); if (!F) continue; if (InsertedFuncs.find(F) != InsertedFuncs.end()) continue; - if (!isComparisonCandidate(F)) continue; unsigned Hash = profileFunction(F); + // Hash code of 0 is just a placeholder. + if (Hash == 0) + continue; FunctionsToCompare[Hash].push_front(F); InsertedFuncs.insert(F); @@ -1200,10 +1166,11 @@ void MergeRegistry::remove(Function *F, bool Reanalyze/*=true*/) { // There is no need to remove a function that is not already // in a bucket. - if (!isComparisonCandidate(F)) + unsigned Hash = profileFunction(F); + // Hash code of 0 is just a placeholder. + if (Hash == 0) return; - unsigned Hash = profileFunction(F); std::list &Bucket = FunctionsToCompare[Hash]; removeFromBucket(F, Bucket); @@ -1259,7 +1226,7 @@ class MergeSimilarFunctions : public ModulePass { public: - static char ID; + static char ID; MergeSimilarFunctions() : ModulePass(ID) { initializeMergeSimilarFunctionsPass(*PassRegistry::getPassRegistry()); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -464,6 +464,11 @@ MPM.add(createGlobalDCEPass()); } + if (EnableMergeSimilarFunctions) { + auto *Summary = (ImportSummary ? ImportSummary : ExportSummary); + MPM.add(createMergeSimilarFunctionsPass(Summary)); + } + addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); // Rename anon globals to be able to export them in the summary. @@ -731,8 +736,10 @@ if (MergeFunctions) MPM.add(createMergeFunctionsPass()); - if (EnableMergeSimilarFunctions) - MPM.add(createMergeSimilarFunctionsPass()); + if (EnableMergeSimilarFunctions) { + auto *Summary = (ImportSummary ? ImportSummary : ExportSummary); + MPM.add(createMergeSimilarFunctionsPass(Summary)); + } // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, @@ -923,8 +930,10 @@ // currently it damages debug info. if (MergeFunctions) PM.add(createMergeFunctionsPass()); - if (EnableMergeSimilarFunctions) - PM.add(createMergeSimilarFunctionsPass()); + if (EnableMergeSimilarFunctions) { + auto *Summary = (ImportSummary ? ImportSummary : ExportSummary); + PM.add(createMergeSimilarFunctionsPass(Summary)); + } } void PassManagerBuilder::populateThinLTOPassManager( Index: test/Transforms/MergeSimilarFunc/merge-debug-info.ll =================================================================== --- test/Transforms/MergeSimilarFunc/merge-debug-info.ll +++ test/Transforms/MergeSimilarFunc/merge-debug-info.ll @@ -163,14 +163,14 @@ !10 = !DIDerivedType(tag: DW_TAG_member, name: "set", scope: !6, file: !1, line: 6, baseType: !11, size: 32, align: 32, offset: 32) !11 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) !12 = !DIDerivedType(tag: DW_TAG_member, name: "get", scope: !6, file: !1, line: 7, baseType: !11, size: 32, align: 32, offset: 64) -!14 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 10, type: !15, isLocal: false, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !17) +!14 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 10, type: !15, isLocal: false, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: true, unit: !0) !15 = !DISubroutineType(types: !16) !16 = !{!11, !9} !17 = !{!18, !19, !20} !18 = !DILocalVariable(name: "b", arg: 1, scope: !14, file: !1, line: 10, type: !9) !19 = !DILocalVariable(name: "res", scope: !14, file: !1, line: 11, type: !11) !20 = !DILocalVariable(name: "ee", scope: !14, file: !1, line: 12, type: !4) -!21 = distinct !DISubprogram(name: "bar1", scope: !1, file: !1, line: 24, type: !15, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !22) +!21 = distinct !DISubprogram(name: "bar1", scope: !1, file: !1, line: 24, type: !15, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: true, unit: !0) !22 = !{!23, !24, !25} !23 = !DILocalVariable(name: "b", arg: 1, scope: !21, file: !1, line: 24, type: !9) !24 = !DILocalVariable(name: "res", scope: !21, file: !1, line: 25, type: !11)