diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -615,6 +615,9 @@ Options.NoRedZone = CodeGenOpts.DisableRedZone; Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; Options.Atomic = CodeGenOpts.AtomicProfileUpdate; + std::pair BinutilsVersion = + llvm::TargetMachine::parseBinutilsVersion(CodeGenOpts.BinutilsVersion); + Options.CounterLinkOrder = BinutilsVersion >= std::make_pair(2, 36); return Options; } diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt --- a/compiler-rt/test/CMakeLists.txt +++ b/compiler-rt/test/CMakeLists.txt @@ -22,7 +22,7 @@ # Use LLVM utils and Clang from the same build tree. list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS clang clang-resource-headers FileCheck count not llvm-config llvm-nm llvm-objdump - llvm-readobj llvm-symbolizer compiler-rt-headers sancov) + llvm-readelf llvm-readobj llvm-symbolizer compiler-rt-headers sancov) if (WIN32) list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS KillTheDoctor) endif() diff --git a/compiler-rt/test/profile/CMakeLists.txt b/compiler-rt/test/profile/CMakeLists.txt --- a/compiler-rt/test/profile/CMakeLists.txt +++ b/compiler-rt/test/profile/CMakeLists.txt @@ -5,6 +5,9 @@ set(PROFILE_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS}) if(NOT COMPILER_RT_STANDALONE_BUILD) list(APPEND PROFILE_TEST_DEPS profile llvm-profdata llvm-cov) + if(NOT APPLE AND COMPILER_RT_HAS_LLD AND TARGET lld) + list(APPEND PROFILE_TEST_DEPS lld) + endif() endif() set(PROFILE_TEST_ARCH ${PROFILE_SUPPORTED_ARCH}) diff --git a/compiler-rt/test/profile/instrprof-gc-sections.c b/compiler-rt/test/profile/instrprof-gc-sections.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/profile/instrprof-gc-sections.c @@ -0,0 +1,91 @@ +// REQUIRES: linux, lld-available + +// RUN: %clang_profgen=%t.profraw -fuse-ld=lld -fcoverage-mapping -mllvm -counter-link-order -mllvm -enable-name-compression=false -DCODE=1 -ffunction-sections -fdata-sections -Wl,--gc-sections -o %t %s +// RUN: %run %t +// RUN: llvm-profdata merge -o %t.profdata %t.profraw +// RUN: llvm-profdata show --all-functions %t.profdata | FileCheck %s -check-prefix=PROF +// RUN: llvm-cov show %t -instr-profile %t.profdata | FileCheck %s -check-prefix=COV +// RUN: llvm-nm %t | FileCheck %s -check-prefix=NM +// RUN: llvm-readelf -x __llvm_prf_names %t | FileCheck %s -check-prefix=PRF_NAMES +// RUN: llvm-readelf -x __llvm_prf_cnts %t | FileCheck %s -check-prefix=PRF_CNTS + +// RUN: %clang_lto_profgen=%t.lto.profraw -fuse-ld=lld -fcoverage-mapping -mllvm -counter-link-order -mllvm -enable-name-compression=false -DCODE=1 -ffunction-sections -fdata-sections -Wl,--gc-sections -flto -o %t.lto %s +// RUN: %run %t.lto +// RUN: llvm-profdata merge -o %t.lto.profdata %t.lto.profraw +// RUN: llvm-profdata show --all-functions %t.lto.profdata | FileCheck %s -check-prefix=PROF +// RUN: llvm-cov show %t.lto -instr-profile %t.lto.profdata | FileCheck %s -check-prefix=COV +// RUN: llvm-nm %t.lto | FileCheck %s -check-prefix=NM +// RUN: llvm-readelf -x __llvm_prf_names %t.lto | FileCheck %s -check-prefix=PRF_NAMES +// RUN: llvm-readelf -x __llvm_prf_cnts %t.lto | FileCheck %s -check-prefix=PRF_CNTS + +// Note: We expect foo() and some of the profiling data associated with it to +// be garbage collected. + +// Note: When there is no code in a program, we expect to see the exact same +// set of external functions provided by the profile runtime. + +// RUN: %clang_profgen -fuse-ld=lld -fcoverage-mapping -mllvm -counter-link-order -ffunction-sections -fdata-sections -Wl,--gc-sections -shared -o %t.nocode.so %s +// RUN: llvm-nm -jgU %t.nocode.so | grep -vE "__prof_orderfile_sect_data|__start_.*|__stop_.*" > %t.nocode.syms +// RUN: llvm-nm -jgU %t | grep -vE "main|_start|_IO_stdin_used|__libc_.*" > %t.code.syms +// RUN: diff %t.nocode.syms %t.code.syms + +// Note: We also check the IR instrumentation and expect foo() to be garbage +// collected as well. + +// RUN: %clang_pgogen=%t.pgo.profraw -fuse-ld=lld -mllvm -counter-link-order -DCODE=1 -ffunction-sections -fdata-sections -Wl,--gc-sections -o %t.pgo %s +// RUN: %run %t.pgo +// RUN: llvm-profdata merge -o %t.pgo.profdata %t.pgo.profraw +// RUN: llvm-profdata show --all-functions %t.pgo.profdata | FileCheck %s -check-prefix=PGO +// RUN: llvm-nm %t.pgo | FileCheck %s -check-prefix=NM + +#ifdef CODE + +// COV: [[@LINE+1]]{{ *}}|{{ *}}0|void foo() +void foo() {} + +// COV: [[@LINE+1]]{{ *}}|{{ *}}1|int main +int main() { return 0; } + +#endif // CODE + +// NM-NOT: foo + +// PROF: Counters: +// PROF-NEXT: main: +// PROF-NEXT: Hash: +// PROF-NEXT: Counters: 1 +// PROF-NEXT: Function count: 1 +// PROF-NEXT: Instrumentation level: Front-end +// PROF-NEXT: Functions shown: 1 +// PROF-NEXT: Total functions: 1 +// PROF-NEXT: Maximum function count: +// PROF-NEXT: Maximum internal block count: + +// Note: We don't expect the names of garbage collected functions to disappear +// from __llvm_prf_names, because collectPGOFuncNameStrings() glues the names +// together. + +// PRF_NAMES: Hex dump of section '__llvm_prf_names': +// PRF_NAMES-NEXT: {{.*}} 0800666f 6f016d61 696e{{.*$}} +// | | f o o # m a i n +// | |___________| +// | | +// UncompressedLen = 8 | +// | +// CompressedLen = 0 + +// Note: We expect the profile counters for garbage collected functions to also +// be garbage collected. + +// PRF_CNTS: Hex dump of section '__llvm_prf_cnts': +// PRF_CNTS-NEXT: {{.*}} 00000000 00000000 {{.*$}} + +// PGO: Counters: +// PGO-NEXT: main: +// PGO-NEXT: Hash: +// PGO-NEXT: Counters: 1 +// PGO-NEXT: Instrumentation level: IR +// PGO-NEXT: Functions shown: 1 +// PGO-NEXT: Total functions: 1 +// PGO-NEXT: Maximum function count: +// PGO-NEXT: Maximum internal block count: diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h --- a/llvm/include/llvm/Transforms/Instrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation.h @@ -132,6 +132,9 @@ // Use BFI to guide register promotion bool UseBFIInPromotion = false; + // Use !associated metadata to enable linker GC + bool CounterLinkOrder = false; + // Name of the profile file to use as output std::string InstrProfileOutput; diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -83,6 +83,9 @@ /// Returns true if profile counter update register promotion is enabled. bool isCounterPromotionEnabled() const; + /// Returns true if the use of !associated metadata is enabled. + bool isCounterLinkOrderEnabled() const; + /// Count the number of instrumented value sites for the function. void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -69,6 +69,11 @@ cl::desc("Enable relocating counters at runtime."), cl::init(false)); +cl::opt CounterLinkOrder( + "counter-link-order", + cl::desc("Set counter associated metadata to enable garbage collection at link time."), + cl::init(false)); + cl::opt ValueProfileStaticAlloc( "vp-static-alloc", cl::desc("Do static counter allocation for value profiler"), @@ -479,6 +484,13 @@ return Options.DoCounterPromotion; } +bool InstrProfiling::isCounterLinkOrderEnabled() const { + if (CounterLinkOrder.getNumOccurrences() > 0) + return CounterLinkOrder; + + return Options.CounterLinkOrder; +} + void InstrProfiling::promoteCounterLoadStores(Function *F) { if (!isCounterPromotionEnabled()) return; @@ -850,6 +862,12 @@ CounterPtr->setAlignment(Align(8)); MaybeSetComdat(CounterPtr); CounterPtr->setLinkage(Linkage); + // We need a self-link for the counter variable because the ELF section name + // (that is __llvm_prf_cnts) is a C identifier and considered a GC root in the + // absence of the SHF_LINK_ORDER flag. + if (isCounterLinkOrderEnabled()) + CounterPtr->setMetadata(LLVMContext::MD_associated, + MDNode::get(Ctx, ValueAsMetadata::get(Fn))); auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); // Allocate statically the array of pointers to value profile nodes for @@ -871,6 +889,10 @@ getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); ValuesVar->setAlignment(Align(8)); MaybeSetComdat(ValuesVar); + if (isCounterLinkOrderEnabled()) + ValuesVar->setMetadata( + LLVMContext::MD_associated, + MDNode::get(Ctx, ValueAsMetadata::get(CounterPtr))); ValuesPtrExpr = ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx)); } @@ -905,6 +927,9 @@ Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT)); MaybeSetComdat(Data); Data->setLinkage(Linkage); + if (isCounterLinkOrderEnabled()) + Data->setMetadata(LLVMContext::MD_associated, + MDNode::get(Ctx, ValueAsMetadata::get(CounterPtr))); PD.RegionCounters = CounterPtr; PD.DataVar = Data; diff --git a/llvm/test/Instrumentation/InstrProfiling/icall.ll b/llvm/test/Instrumentation/InstrProfiling/icall.ll --- a/llvm/test/Instrumentation/InstrProfiling/icall.ll +++ b/llvm/test/Instrumentation/InstrProfiling/icall.ll @@ -11,6 +11,7 @@ ; RUN: opt < %s -mtriple=mips64-unknown-linux -instrprof -vp-static-alloc=true -S | FileCheck %s --check-prefix=STATIC-SEXT ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -vp-static-alloc=false -instrprof -S | FileCheck %s --check-prefix=DYN ; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -vp-static-alloc=false -S | FileCheck %s --check-prefix=DYN +; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -counter-link-order -vp-static-alloc=true -S | FileCheck %s --check-prefix=METADATA @__profn_foo = private constant [3 x i8] c"foo" @@ -57,3 +58,12 @@ ; STATIC: declare void @__llvm_profile_instrument_target(i64, i8*, i32) ; STATIC-EXT: declare void @__llvm_profile_instrument_target(i64, i8*, i32 zeroext) ; STATIC-SEXT: declare void @__llvm_profile_instrument_target(i64, i8*, i32 signext) + +; METADATA: @__profc_foo = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", align 8, !associated !0 +; METADATA: @__profvp_foo = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", align 8, !associated !1 +; METADATA: @__profc_bar = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", comdat($__profd_bar), align 8, !associated !2 +; METADATA: @__profvp_bar = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", comdat($__profd_bar), align 8, !associated !3 +; METADATA: !0 = !{i32 (i32 ()*)* @foo} +; METADATA: !1 = !{[1 x i64]* @__profc_foo} +; METADATA: !2 = !{i32 (i32 ()*)* @bar} +; METADATA: !3 = !{[1 x i64]* @__profc_bar} diff --git a/llvm/test/Instrumentation/InstrProfiling/linkage.ll b/llvm/test/Instrumentation/InstrProfiling/linkage.ll --- a/llvm/test/Instrumentation/InstrProfiling/linkage.ll +++ b/llvm/test/Instrumentation/InstrProfiling/linkage.ll @@ -8,6 +8,8 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=instrprof -S | FileCheck %s --check-prefixes=POSIX,LINUX ; RUN: opt < %s -mtriple=x86_64-unknown-fuchsia -passes=instrprof -S | FileCheck %s --check-prefixes=POSIX,LINUX ; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -passes=instrprof -S | FileCheck %s --check-prefixes=COFF +; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -counter-link-order -S | FileCheck %s --check-prefixes=LINUX,POSIX,METADATA +; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=instrprof -counter-link-order -S | FileCheck %s --check-prefixes=LINUX,POSIX,METADATA ; MACHO: @__llvm_profile_runtime = external global i32 ; LINUX-NOT: @__llvm_profile_runtime = external global i32 @@ -19,7 +21,9 @@ @__profn_foo_extern = linkonce_odr hidden constant [10 x i8] c"foo_extern" ; POSIX: @__profc_foo = hidden global +; METADATA-SAME: !associated !0 ; POSIX: @__profd_foo = hidden global +; METADATA-SAME: !associated !1 ; COFF: @__profc_foo = internal global ; COFF-NOT: comdat ; COFF: @__profd_foo = internal global @@ -29,7 +33,9 @@ } ; POSIX: @__profc_foo_weak = weak hidden global +; METADATA: !associated !2 ; POSIX: @__profd_foo_weak = weak hidden global +; METADATA: !associated !3 ; COFF: @__profc_foo_weak = internal global ; COFF: @__profd_foo_weak = internal global define weak void @foo_weak() { @@ -38,7 +44,9 @@ } ; POSIX: @"__profc_linkage.ll:foo_internal" = internal global +; METADATA-SAME: !associated !4 ; POSIX: @"__profd_linkage.ll:foo_internal" = internal global +; METADATA-SAME: !associated !5 ; COFF: @"__profc_linkage.ll:foo_internal" = internal global ; COFF: @"__profd_linkage.ll:foo_internal" = internal global define internal void @foo_internal() { @@ -47,7 +55,9 @@ } ; POSIX: @__profc_foo_inline = linkonce_odr hidden global +; METADATA-SAME: !associated !6 ; POSIX: @__profd_foo_inline = linkonce_odr hidden global +; METADATA-SAME: !associated !7 ; COFF: @__profc_foo_inline = internal global{{.*}} section ".lprfc$M", align 8 ; COFF: @__profd_foo_inline = internal global{{.*}} section ".lprfd$M", align 8 define linkonce_odr void @foo_inline() { @@ -56,7 +66,9 @@ } ; LINUX: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_cnts", comdat($__profd_foo_extern), align 8 +; METADATA-SAME: !associated !8 ; LINUX: @__profd_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_data", comdat, align 8 +; METADATA-SAME: !associated !9 ; MACHO: @__profc_foo_extern = linkonce_odr hidden global ; MACHO: @__profd_foo_extern = linkonce_odr hidden global ; COFF: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfc$M", comdat, align 8 @@ -75,3 +87,14 @@ ; COFF: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} comdat { ; LINUX-NOT: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} { ; LINUX-NOT: %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime + +; METADATA: !0 = !{void ()* @foo} +; METADATA: !1 = !{[1 x i64]* @__profc_foo} +; METADATA: !2 = !{void ()* @foo_weak} +; METADATA: !3 = !{[1 x i64]* @__profc_foo_weak} +; METADATA: !4 = !{void ()* @foo_internal} +; METADATA: !5 = !{[1 x i64]* @"__profc_linkage.ll:foo_internal"} +; METADATA: !6 = !{void ()* @foo_inline} +; METADATA: !7 = !{[1 x i64]* @__profc_foo_inline} +; METADATA: !8 = !{void ()* @foo_extern} +; METADATA: !9 = !{[1 x i64]* @__profc_foo_extern} diff --git a/llvm/test/Transforms/PGOProfile/associated.ll b/llvm/test/Transforms/PGOProfile/associated.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/associated.ll @@ -0,0 +1,12 @@ +; RUN: opt < %s -pgo-instr-gen -instrprof -counter-link-order -S | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-gen,instrprof -counter-link-order -S | FileCheck %s + +; CHECK: @__profc_foo = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8, !associated !0 +; CHECK: @__profd_foo = private global {{.*}}, section "__llvm_prf_data", align 8, !associated !1 + +define void @foo() { + ret void +} + +; CHECK: !0 = !{void ()* @foo} +; CHECK: !1 = !{[1 x i64]* @__profc_foo} diff --git a/llvm/test/Transforms/PGOProfile/counter_promo.ll b/llvm/test/Transforms/PGOProfile/counter_promo.ll --- a/llvm/test/Transforms/PGOProfile/counter_promo.ll +++ b/llvm/test/Transforms/PGOProfile/counter_promo.ll @@ -60,7 +60,7 @@ ; ATOMIC_PROMO: atomicrmw add {{.*}} @__profc_foo{{.*}}0), i64 %[[LIVEOUT1]] seq_cst ; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}1), i64 %[[LIVEOUT2]] seq_cst ; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}2), i64 %[[LIVEOUT3]] seq_cst -; PROMO-NOT: @__profc_foo +; PROMO-NOT: @__profc_foo{{.*}}) } diff --git a/llvm/test/Transforms/PGOProfile/counter_promo_mexits.ll b/llvm/test/Transforms/PGOProfile/counter_promo_mexits.ll --- a/llvm/test/Transforms/PGOProfile/counter_promo_mexits.ll +++ b/llvm/test/Transforms/PGOProfile/counter_promo_mexits.ll @@ -69,7 +69,7 @@ ; PROMO-NEXT: %pgocount{{.*}} = load {{.*}} @__profc_foo{{.*}} 4) ; PROMO-NEXT: add ; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}4) -; PROMO-NOT: @__profc_foo +; PROMO-NOT: @__profc_foo{{.*}}) bb15: ; preds = %bb14, %bb4