Index: include/llvm/ProfileData/InstrProf.h =================================================================== --- include/llvm/ProfileData/InstrProf.h +++ include/llvm/ProfileData/InstrProf.h @@ -274,6 +274,10 @@ /// declared by users only. void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); +/// Check if we can use Comdat for profile variables. This will eliminate +/// the duplicated profile variables for Comdat functions. +bool needsComdatForCounter(const Function &F, const Module &M); + const std::error_category &instrprof_category(); enum class instrprof_error { Index: lib/ProfileData/InstrProf.cpp =================================================================== --- lib/ProfileData/InstrProf.cpp +++ lib/ProfileData/InstrProf.cpp @@ -14,6 +14,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -780,4 +781,29 @@ F.setMetadata(getPGOFuncNameMetadataName(), N); } +bool needsComdatForCounter(const Function &F, const Module &M) { + if (F.hasComdat()) + return true; + + Triple TT(M.getTargetTriple()); + if (!TT.isOSBinFormatELF()) + return false; + + // See createPGOFuncNameVar for more details. To avoid link errors, profile + // counters for function with available_externally linkage needs to be changed + // to linkonce linkage. On ELF based systems, this leads to weak symbols to be + // created. Without using comdat, duplicate entries won't be removed by the + // linker leading to increased data segement size and raw profile size. Even + // worse, since the referenced counter from profile per-function data object + // will be resolved to the common strong definition, the profile counts for + // available_externally functions will end up being duplicated in raw profile + // data. This can result in distorted profile as the counts of those dups + // will be accumulated by the profile merger. + GlobalValue::LinkageTypes Linkage = F.getLinkage(); + if (Linkage != GlobalValue::ExternalWeakLinkage && + Linkage != GlobalValue::AvailableExternallyLinkage) + return false; + + return true; +} } // end namespace llvm Index: lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- lib/Transforms/Instrumentation/InstrProfiling.cpp +++ lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -268,33 +268,6 @@ return F->hasAddressTaken() || F->hasLinkOnceLinkage(); } -static inline bool needsComdatForCounter(Function &F, Module &M) { - - if (F.hasComdat()) - return true; - - Triple TT(M.getTargetTriple()); - if (!TT.isOSBinFormatELF()) - return false; - - // See createPGOFuncNameVar for more details. To avoid link errors, profile - // counters for function with available_externally linkage needs to be changed - // to linkonce linkage. On ELF based systems, this leads to weak symbols to be - // created. Without using comdat, duplicate entries won't be removed by the - // linker leading to increased data segement size and raw profile size. Even - // worse, since the referenced counter from profile per-function data object - // will be resolved to the common strong definition, the profile counts for - // available_externally functions will end up being duplicated in raw profile - // data. This can result in distorted profile as the counts of those dups - // will be accumulated by the profile merger. - GlobalValue::LinkageTypes Linkage = F.getLinkage(); - if (Linkage != GlobalValue::ExternalWeakLinkage && - Linkage != GlobalValue::AvailableExternallyLinkage) - return false; - - return true; -} - static inline Comdat *getOrCreateProfileComdat(Module &M, Function &F, InstrProfIncrementInst *Inc) { if (!needsComdatForCounter(F, M)) Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -51,6 +51,7 @@ #include "llvm/Transforms/PGOInstrumentation.h" #include "CFGMST.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -59,6 +60,7 @@ #include "llvm/Analysis/IndirectCallSiteVisitor.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -75,6 +77,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include #include +#include #include #include @@ -112,6 +115,13 @@ cl::desc("Max number of annotations for a single indirect " "call callsite")); +// Command line option to control appending FunctionHash to the name of a COMDAT +// function. This is to avoid the hash mismatch caused by the preinliner. +static cl::opt ComdatAppendHash( + "comdat-append-hash", cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Append functin hash to the name of COMDAT function to avoid " + "function hash mismatch due to the preinliner")); + // Command line option to enable/disable the warning about missing profile // information. static cl::opt NoPGOWarnMissing("no-pgo-warn-missing", cl::init(false), @@ -238,6 +248,9 @@ private: Function &F; void computeCFGHash(); + void renameComdatFunction(); + // A map that stores the Comdat group in function F. + std::unordered_multimap &ComdatMembers; public: std::string FuncName; @@ -261,12 +274,17 @@ Twine(FunctionHash) + "\t" + Str); } - FuncPGOInstrumentation(Function &Func, bool CreateGlobalVar = false, - BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr) - : F(Func), FunctionHash(0), MST(F, BPI, BFI) { + FuncPGOInstrumentation( + Function &Func, + std::unordered_multimap &ComdatMembers, + bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr) + : F(Func), ComdatMembers(ComdatMembers), FunctionHash(0), + MST(F, BPI, BFI) { FuncName = getPGOFuncName(F); computeCFGHash(); + if (ComdatAppendHash && ComdatMembers.size()) + renameComdatFunction(); DEBUG(dumpInfo("after CFGMST")); NumOfPGOBB += MST.BBInfos.size(); @@ -299,7 +317,68 @@ } } JC.update(Indexes); - FunctionHash = (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); + FunctionHash = (uint64_t)findIndirectCallSites(F).size() << 48 | + (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); +} + +// Check if we can safely rename this Comdat function. +static bool canComdatRename( + Function &F, + std::unordered_multimap &ComdatMembers) { + if (F.getName().empty()) + return false; + if (!needsComdatForCounter(F, *(F.getParent()))) + return false; + // Only safe to do if this function may be discarded if it is not used + // in the compilation unit. + if (!GlobalValue::isDiscardableIfUnused(F.getLinkage())) + return false; + + // For AvailableExternallyLinkage functions. + if (!F.hasComdat()) + return true; + + // Only handle those Comdat groups that only containing one function (OK to + // have have other variables/aliases). + Comdat *C = F.getComdat(); + for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { + if (!dyn_cast(CM.second)) + continue; + Function *FM = dyn_cast(CM.second); + if (FM != &F) + return false; + } + return true; +} + +// Append the CFGHash to the Comdat function name. +template +void FuncPGOInstrumentation::renameComdatFunction() { + if (!canComdatRename(F, ComdatMembers)) + return; + std::string NewFuncName = + Twine(F.getName() + "." + Twine(FunctionHash)).str(); + F.setName(Twine(NewFuncName)); + FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); + Comdat *NewComdat; + Module *M = F.getParent(); + // For AvailableExternallyLinkage functions, changed to Comdat. + if (!F.hasComdat()) { + NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); + F.setLinkage(GlobalValue::LinkOnceODRLinkage); + F.setComdat(NewComdat); + return; + } + + // This function belongs to a single function Comdat group. + Comdat *OrigComdat = F.getComdat(); + std::string NewComdatName = + Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); + NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); + NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); + + for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) + dyn_cast(CM.second)->setComdat(NewComdat); } // Given a CFG E to be instrumented, find which BB to place the instrumented @@ -340,16 +419,16 @@ // Visit all edge and instrument the edges not in MST, and do value profiling. // Critical edges will be split. -static void instrumentOneFunc(Function &F, Module *M, - BranchProbabilityInfo *BPI, - BlockFrequencyInfo *BFI) { +static void instrumentOneFunc( + Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, + std::unordered_multimap &ComdatMembers) { unsigned NumCounters = 0; - FuncPGOInstrumentation FuncInfo(F, true, BPI, BFI); + FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, + BFI); for (auto &E : FuncInfo.MST.AllEdges) { if (!E->InMST && !E->Removed) NumCounters++; } - uint32_t I = 0; Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); for (auto &E : FuncInfo.MST.AllEdges) { @@ -456,9 +535,11 @@ class PGOUseFunc { public: - PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr, + PGOUseFunc(Function &Func, Module *Modu, + std::unordered_multimap &ComdatMembers, + BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr) - : F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI), + : F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI), FreqAttr(FFA_Normal) {} // Read counts for the instrumented BB from profile. @@ -479,6 +560,8 @@ // Return the function hotness from the profile. FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } + // Return the function hash. + uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } // Return the profile record for this function; InstrProfRecord &getProfileRecord() { return ProfileRecord; } @@ -802,16 +885,37 @@ StringRef(INSTR_PROF_QUOTE(IR_LEVEL_PROF_VERSION_VAR)))); } +// Collect the set of members for each comdat.in module M and store +// in ComdatMembers. +static void collectComdatMembers( + Module &M, + std::unordered_multimap &ComdatMembers) { + if (!ComdatAppendHash) + return; + for (Function &F : M) + if (Comdat *C = F.getComdat()) + ComdatMembers.insert(std::make_pair(C, &F)); + for (GlobalVariable &GV : M.globals()) + if (Comdat *C = GV.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GV)); + for (GlobalAlias &GA : M.aliases()) + if (Comdat *C = GA.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GA)); +} + static bool InstrumentAllFunctions( Module &M, function_ref LookupBPI, function_ref LookupBFI) { createIRLevelProfileFlagVariable(M); + std::unordered_multimap ComdatMembers; + collectComdatMembers(M, ComdatMembers); + for (auto &F : M) { if (F.isDeclaration()) continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - instrumentOneFunc(F, &M, BPI, BFI); + instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers); } return true; } @@ -877,14 +981,17 @@ return false; } + std::unordered_multimap ComdatMembers; + collectComdatMembers(M, ComdatMembers); std::vector HotFunctions; std::vector ColdFunctions; + InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); for (auto &F : M) { if (F.isDeclaration()) continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - PGOUseFunc Func(F, &M, BPI, BFI); + PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); if (!Func.readCounters(PGOReader.get())) continue; Func.populateCounters(); @@ -910,7 +1017,6 @@ F->addFnAttr(llvm::Attribute::Cold); DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n"); } - return true; } Index: test/Transforms/PGOProfile/Inputs/indirect_call.proftext =================================================================== --- test/Transforms/PGOProfile/Inputs/indirect_call.proftext +++ test/Transforms/PGOProfile/Inputs/indirect_call.proftext @@ -1,7 +1,7 @@ :ir bar # Func Hash: -12884901887 +281487861612543 # Num Counters: 1 # Counter Values: Index: test/Transforms/PGOProfile/comdat_internal.ll =================================================================== --- test/Transforms/PGOProfile/comdat_internal.ll +++ test/Transforms/PGOProfile/comdat_internal.ll @@ -4,19 +4,19 @@ target triple = "x86_64-unknown-linux-gnu" $foo = comdat any +; CHECK: $foo.12884901887 = comdat any ; CHECK: $__llvm_profile_raw_version = comdat any -; CHECK: $__profv__stdin__foo = comdat any +; CHECK: $__profv__stdin__foo.12884901887 = comdat any @bar = global i32 ()* @foo, align 8 ; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat -; CHECK: @__profn__stdin__foo = private constant [11 x i8] c":foo" -; CHECK: @__profc__stdin__foo = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profv__stdin__foo), align 8 -; CHECK: @__profd__stdin__foo = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 -5640069336071256030, i64 12884901887, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo, i32 0, i32 0), i8* +; CHECK: @__profn__stdin__foo.12884901887 = private constant [23 x i8] c":foo.12884901887" +; CHECK: @__profc__stdin__foo.12884901887 = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profv__stdin__foo.12884901887), align 8 +; CHECK: @__profd__stdin__foo.12884901887 = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 6965568665848889497, i64 12884901887, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.12884901887, i32 0, i32 0), i8* null ; CHECK-NOT: bitcast (i32 ()* @foo to i8*) -; CHECK-SAME: null -; CHECK-SAME: , i8* null, i32 1, [1 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo), align 8 +; CHECK-SAME: , i8* null, i32 1, [1 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo.12884901887), align 8 ; CHECK: @__llvm_prf_nm ; CHECK: @llvm.used Index: test/Transforms/PGOProfile/indirect_call_profile.ll =================================================================== --- test/Transforms/PGOProfile/indirect_call_profile.ll +++ test/Transforms/PGOProfile/indirect_call_profile.ll @@ -13,10 +13,10 @@ define void @foo() { entry: ; GEN: entry: -; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i32 1, i32 0) +; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 281487861612543, i32 1, i32 0) %tmp = load void ()*, void ()** @bar, align 8 ; GEN: [[ICALL_TARGET:%[0-9]+]] = ptrtoint void ()* %tmp to i64 -; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i64 [[ICALL_TARGET]], i32 0, i32 0) +; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 281487861612543, i64 [[ICALL_TARGET]], i32 0, i32 0) call void %tmp() ret void } @@ -30,7 +30,7 @@ invoke void %tmp2() to label %bb10 unwind label %bb2 ; GEN: [[ICALL_TARGET2:%[0-9]+]] = ptrtoint void ()* %tmp2 to i64 -; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_foo2, i32 0, i32 0), i64 38432627612, i64 [[ICALL_TARGET2]], i32 0, i32 0) +; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_foo2, i32 0, i32 0), i64 281513409338268, i64 [[ICALL_TARGET2]], i32 0, i32 0) bb2: ; preds = %bb %tmp3 = landingpad { i8*, i32 } @@ -54,7 +54,7 @@ } ; Test that comdat function's address is recorded. -; LOWER: @__profd_foo3 = linkonce_odr{{.*}}@foo3 +; LOWER: @__profd_foo3.12884901887 = linkonce_odr{{.*}}@foo3.12884901887 ; Function Attrs: nounwind uwtable define linkonce_odr i32 @foo3() comdat { ret i32 1