diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -222,7 +222,7 @@ continue; if (config->saveTemps) - saveBuffer(buf[i], ltoObjName); + saveBuffer(objBuf, ltoObjName); ret.push_back(make(MemoryBufferRef(objBuf, ltoObjName))); } diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -1143,8 +1143,8 @@ // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime // aware this is an ir_level profile so it can set the version flag. -void createIRLevelProfileFlagVar(Module &M, bool IsCS, - bool InstrEntryBBEnabled); +void createIRLevelProfileFlagVar(Module &M, bool IsCS, bool InstrEntryBBEnabled, + bool IsPrivate = false); // Create the variable for the profile file name. void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1099,7 +1099,7 @@ auto IRInstrVar = M->getNamedGlobal(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); if (!IRInstrVar || IRInstrVar->isDeclaration() || - IRInstrVar->hasLocalLinkage()) + (IRInstrVar->hasLocalLinkage() && !IRInstrVar->hasPrivateLinkage())) return false; // Check if the flag is set. @@ -1137,8 +1137,8 @@ // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime // aware this is an ir_level profile so it can set the version flag. -void createIRLevelProfileFlagVar(Module &M, bool IsCS, - bool InstrEntryBBEnabled) { +void createIRLevelProfileFlagVar(Module &M, bool IsCS, bool InstrEntryBBEnabled, + bool IsPrivate) { const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); Type *IntTy64 = Type::getInt64Ty(M.getContext()); uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); @@ -1151,7 +1151,9 @@ Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); Triple TT(M.getTargetTriple()); - if (TT.supportsCOMDAT()) { + if (IsPrivate) { + IRLevelVersionVariable->setLinkage(GlobalValue::PrivateLinkage); + } else if (TT.supportsCOMDAT()) { IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); } diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1609,8 +1609,13 @@ function_ref LookupBFI, bool IsCS) { // For the context-sensitve instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. - if (!IsCS) - createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry); + // During LTO/ThinLTO linking, if the IRPGOFlag var is non-prevailing, it will + // be dropped for opt. In such case, create the symbol with private linkage + // for opt that should be eliminated later to avoid duplicate symbols. + if (!IsCS || !isIRPGOFlagSet(&M)) + createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry, + /* IsPrivate */ IsCS); + std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); diff --git a/llvm/test/Transforms/PGOProfile/Inputs/hybrid_thinlto_cspgo_gen.ll b/llvm/test/Transforms/PGOProfile/Inputs/hybrid_thinlto_cspgo_gen.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/Inputs/hybrid_thinlto_cspgo_gen.ll @@ -0,0 +1,44 @@ +; ModuleID = 'test-v1.bc' +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%class.Child = type { %class.Base.base, [4 x i8] } +%class.Base.base = type <{ i32 (...)**, i32 }> + +$__llvm_profile_filename = comdat any + +$__llvm_profile_raw_version = comdat any + +@_ZTV5Child = external unnamed_addr constant { [3 x i8*] }, align 8 +@_ZTVN10__cxxabiv121__vmi_class_type_infoE = external dso_local global i8* +@_ZTS5Child = dso_local constant [7 x i8] c"5Child\00", align 1 +@_ZTI4Base = external dso_local constant i8* +@_ZTI5Child = dso_local constant { i8*, i8*, i32, i32, i8*, i64 } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @_ZTS5Child, i32 0, i32 0), i32 0, i32 1, i8* bitcast (i8** @_ZTI4Base to i8*), i64 0 }, align 8 +@__llvm_profile_filename = local_unnamed_addr constant [19 x i8] c"default_%m.profraw\00", comdat +@__llvm_profile_raw_version = local_unnamed_addr constant i64 216172782113783814, comdat + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn +define dso_local i32 @_ZN5Child3getEv(%class.Child* nocapture readnone align 8 %this) unnamed_addr #0 align 2 { +entry: + ret i32 1 +} + +attributes #0 = { "target-cpu"="x86-64" } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"uwtable", i32 1} +!2 = !{i32 1, !"EnableSplitLTOUnit", i32 1} + +^0 = module: (path: "test-v1.bc", hash: (1837240108, 3253769035, 1388040350, 3007262886, 1978890690)) +^1 = gv: (name: "__llvm_profile_raw_version", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 1)))) ; guid = 3271499267116101737 +^2 = gv: (name: "_ZTV5Child") ; guid = 5226103449856009057 +^3 = gv: (name: "_ZN5Child3getEv", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1, funcFlags: (readNone: 1, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0)))) ; guid = 7125659195073751167 +^4 = gv: (name: "_ZTS5Child", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 0, constant: 1)))) ; guid = 7547014829888929871 +^5 = gv: (name: "_ZTI5Child", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 0, constant: 1), refs: (^7, ^4, ^6)))) ; guid = 12661005278488252785 +^6 = gv: (name: "_ZTI4Base") ; guid = 13357340906568957678 +^7 = gv: (name: "_ZTVN10__cxxabiv121__vmi_class_type_infoE") ; guid = 15154540976975978366 +^8 = gv: (name: "__llvm_profile_filename", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 1)))) ; guid = 17292341786426384195 +^9 = flags: 8 +^10 = blockcount: 1 diff --git a/llvm/test/Transforms/PGOProfile/hybrid_lto_cspgo_gen.ll b/llvm/test/Transforms/PGOProfile/hybrid_lto_cspgo_gen.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/hybrid_lto_cspgo_gen.ll @@ -0,0 +1,45 @@ +; REQUIRES: x86-registered-target + +; RUN: opt -module-summary %S/Inputs/hybrid_thinlto_cspgo_gen.ll -o %t1.bc +; RUN: opt -module-summary %s -o %t2.bc + +;; Test IRPGO is set and profiling variables will contain a FuncHash suffix. +; RUN: ld.lld --lto-cs-profile-file=alloc --lto-cs-profile-generate --lto-O2 --save-temps --shared -o %t %t1.bc %t2.bc +; RUN: llvm-dis %t.0.4.opt.bc -o - | FileCheck %s --check-prefix=ENABLE-IRPGO + +; ENABLE-IRPGO: $__profc__ZN5Child3getEv.[[HASH:[0-9]+]] + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%class.Child.1 = type { %class.Base.base.0, [4 x i8] } +%class.Base.base.0 = type <{ i32 (...)**, i32 }> + +@_ZTV5Child = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i32, i32, i8*, i64 }* @_ZTI5Child to i8*), i8* bitcast (i32 (%class.Child.1*)* @_ZN5Child3getEv to i8*)] }, align 8, !type !0, !type !1, !type !2, !type !3 +@_ZTI5Child = external dso_local constant { i8*, i8*, i32, i32, i8*, i64 }, align 8 + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn +define available_externally dso_local i32 @_ZN5Child3getEv(%class.Child.1* nocapture readnone align 8 %this) unnamed_addr #0 align 2 { +entry: + ret i32 1 +} + +attributes #0 = { "target-cpu"="x86-64" } + +!llvm.module.flags = !{!4, !5, !6, !7} + +!0 = !{i64 16, !"_ZTS4Base"} +!1 = !{i64 16, !"_ZTSM4BaseFivE.virtual"} +!2 = !{i64 16, !"_ZTS5Child"} +!3 = !{i64 16, !"_ZTSM5ChildFivE.virtual"} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"uwtable", i32 1} +!6 = !{i32 1, !"EnableSplitLTOUnit", i32 1} +!7 = !{i32 1, !"ThinLTO", i32 0} + +^0 = module: (path: "test-v1.bc", hash: (0, 0, 0, 0, 0)) +^1 = gv: (name: "_ZTV5Child", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 0, constant: 1), refs: (^3, ^2)))) ; guid = 5226103449856009057 +^2 = gv: (name: "_ZN5Child3getEv", summaries: (function: (module: ^0, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1, funcFlags: (readNone: 1, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0)))) ; guid = 7125659195073751167 +^3 = gv: (name: "_ZTI5Child") ; guid = 12661005278488252785 +^4 = flags: 8 +^5 = blockcount: 1