Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -404,6 +404,13 @@ /// call edge pair. using EdgeTy = std::pair; + /// Types for -force-summary-edges-cold debugging option. + enum ForceSummaryHotnessType : unsigned { + FSHT_None, + FSHT_AllNonCritical, + FSHT_All + }; + /// An "identifier" for a virtual function. This contains the type identifier /// represented as a GUID and the offset from the address point to the virtual /// function pointer, where "address point" is as defined in the Itanium ABI: Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -49,6 +49,7 @@ #include "llvm/Object/SymbolicFile.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include #include #include @@ -58,6 +59,18 @@ #define DEBUG_TYPE "module-summary-analysis" +// Option to force edges cold which will block importing when the +// -import-cold-multiplier is set to 0. Useful for debugging. +FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold = + FunctionSummary::FSHT_None; +cl::opt FSEC( + "force-summary-edges-cold", cl::Hidden, cl::location(ForceSummaryEdgesCold), + cl::desc("Force all edges in the function summary to cold"), + cl::values(clEnumValN(FunctionSummary::FSHT_None, "none", "None."), + clEnumValN(FunctionSummary::FSHT_AllNonCritical, + "all-non-critical", "All non-critical edges."), + clEnumValN(FunctionSummary::FSHT_All, "all", "All edges."))); + // Walk through the operands of a given User via worklist iteration and populate // the set of GlobalValue references encountered. Invoked either on an // Instruction or a GlobalVariable (which walks its initializer). @@ -268,6 +281,8 @@ auto ScaledCount = PSI->getProfileCount(&I, BFI); auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI) : CalleeInfo::HotnessType::Unknown; + if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None) + Hotness = CalleeInfo::HotnessType::Cold; // Use the original CalledValue, in case it was an alias. We want // to record the call edge to the alias in that case. Eventually @@ -318,7 +333,9 @@ // sample PGO, to enable the same inlines as the profiled optimized binary. for (auto &I : F.getImportGUIDs()) CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness( - CalleeInfo::HotnessType::Critical); + ForceSummaryEdgesCold == FunctionSummary::FSHT_All + ? CalleeInfo::HotnessType::Cold + : CalleeInfo::HotnessType::Critical); bool NonRenamableLocal = isNonRenamableLocal(F); bool NotEligibleForImport = Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -89,6 +89,9 @@ cl::opt WriteRelBFToSummary( "write-relbf-to-summary", cl::Hidden, cl::init(false), cl::desc("Write relative block frequency to function summary ")); + +extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold; + namespace { /// These are manifest constants used by the bitcode writer. They do not need to @@ -3436,7 +3439,8 @@ for (auto &RI : FS->refs()) NameVals.push_back(VE.getValueID(RI.getValue())); - bool HasProfileData = F.hasProfileData(); + bool HasProfileData = + F.hasProfileData() || ForceSummaryEdgesCold != FunctionSummary::FSHT_None; for (auto &ECI : FS->calls()) { NameVals.push_back(getValueId(ECI.first)); if (HasProfileData) Index: test/Transforms/FunctionImport/Inputs/funcimport_forcecold.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionImport/Inputs/funcimport_forcecold.ll @@ -0,0 +1,4 @@ +define void @foo() { +entry: + ret void +} Index: test/Transforms/FunctionImport/funcimport_forcecold.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionImport/funcimport_forcecold.ll @@ -0,0 +1,36 @@ +; Test to ensure that building summary with -force-summary-edges-cold +; blocks importing as expected. + +; "-stats" and "-debug-only" require +Asserts. +; REQUIRES: asserts + +; First do with default options, which should import +; RUN: opt -module-summary %s -o %t.bc +; RUN: opt -module-summary %p/Inputs/funcimport_forcecold.ll -o %t2.bc +; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc +; RUN: opt -function-import -stats -print-imports -summary-file %t3.thinlto.bc %t.bc -S 2>&1 | FileCheck %s --check-prefix=IMPORT + +; Next rebuild caller module summary with non-critical edges forced cold (which +; should affect all edges in this test as we don't have any sample pgo). +; Make sure we don't import. +; RUN: opt -force-summary-edges-cold=all-non-critical -module-summary %s -o %t.bc +; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc +; RUN: opt -function-import -stats -print-imports -summary-file %t3.thinlto.bc %t.bc -S 2>&1 | FileCheck %s --check-prefix=NOIMPORT + +; Next rebuild caller module summary with all edges forced cold. +; Make sure we don't import. +; RUN: opt -force-summary-edges-cold=all -module-summary %s -o %t.bc +; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc +; RUN: opt -function-import -stats -print-imports -summary-file %t3.thinlto.bc %t.bc -S 2>&1 | FileCheck %s --check-prefix=NOIMPORT + +define i32 @main() { +entry: + call void @foo() + ret i32 0 +} + +; IMPORT: Import foo +; NOIMPORT-NOT: Import foo +; IMPORT: define available_externally void @foo() +; NOIMPORT: declare void @foo() +declare void @foo() Index: test/Transforms/FunctionImport/funcimport_forcecold_samplepgo.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionImport/funcimport_forcecold_samplepgo.ll @@ -0,0 +1,37 @@ +; Test to ensure that building summary with -force-summary-edges-cold +; blocks importing as expected. + +; "-stats" and "-debug-only" require +Asserts. +; REQUIRES: asserts + +; First do with default options, which should import +; RUN: opt -module-summary %s -o %t.bc +; RUN: opt -module-summary %p/Inputs/funcimport_forcecold.ll -o %t2.bc +; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc +; RUN: opt -function-import -stats -print-imports -summary-file %t3.thinlto.bc %t.bc -S 2>&1 | FileCheck %s --check-prefix=IMPORT + +; Next rebuild caller module summary with only non-critical edges forced cold, +; which should still import in this case. +; RUN: opt -force-summary-edges-cold=all-non-critical -module-summary %s -o %t.bc +; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc +; RUN: opt -function-import -stats -print-imports -summary-file %t3.thinlto.bc %t.bc -S 2>&1 | FileCheck %s --check-prefix=IMPORT + +; Next rebuild caller module summary with all edges forced cold. +; Make sure we don't import. +; RUN: opt -force-summary-edges-cold=all -module-summary %s -o %t.bc +; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc +; RUN: opt -function-import -stats -print-imports -summary-file %t3.thinlto.bc %t.bc -S 2>&1 | FileCheck %s --check-prefix=NOIMPORT + +define i32 @main() !prof !1 { +entry: + call void @foo() + ret i32 0 +} + +; IMPORT: Import foo +; NOIMPORT-NOT: Import foo +; IMPORT: define available_externally void @foo() +; NOIMPORT: declare void @foo() +declare void @foo() + +!1 = !{!"function_entry_count", i64 110, i64 6699318081062747564}