Index: compiler-rt/lib/profile/InstrProfilingPlatformLinux.c =================================================================== --- compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -37,14 +37,22 @@ extern ValueProfNode PROF_VNODES_STOP COMPILER_RT_VISIBILITY; /* Add dummy data to ensure the section is always created. */ +#if defined(__ELF__) +__asm__( + ".pushsection " INSTR_PROF_DATA_SECT_NAME ",\"ao\",@progbits,.text\n\t" + ".popsection\n" + ".pushsection " INSTR_PROF_CNTS_SECT_NAME ",\"ao\",@progbits,.text\n\t" + ".popsection\n"); +#else __llvm_profile_data __prof_data_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_DATA_SECT_NAME); uint64_t __prof_cnts_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_CNTS_SECT_NAME); +#endif +ValueProfNode __prof_vnodes_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_VNODES_SECT_NAME); uint32_t __prof_orderfile_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_ORDERFILE_SECT_NAME); char __prof_nms_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_NAME_SECT_NAME); -ValueProfNode __prof_vnodes_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_VNODES_SECT_NAME); COMPILER_RT_VISIBILITY const __llvm_profile_data * __llvm_profile_begin_data(void) { Index: compiler-rt/test/profile/instrprof-gc-sections.c =================================================================== --- /dev/null +++ compiler-rt/test/profile/instrprof-gc-sections.c @@ -0,0 +1,72 @@ +// REQUIRES: linux, lld-available + +// RUN: %clang_profgen=%t.profraw -fuse-ld=lld -fcoverage-mapping -mllvm -enable-name-compression=false -DCODE=1 -ffunction-sections -fdata-sections -Wl,--gc-sections -o %t %s +// RUN: %run %t +// RUN: llvm-profdata merge -o %t.profdata %t.profraw +// RUN: llvm-profdata show --all-functions %t.profdata | FileCheck %s -check-prefix=PROF +// RUN: llvm-cov show %t -instr-profile %t.profdata | FileCheck %s -check-prefix=COV +// RUN: llvm-nm %t | FileCheck %s -check-prefix=NM +// RUN: llvm-readelf -x __llvm_prf_names %t | FileCheck %s -check-prefix=PRF_NAMES +// RUN: llvm-readelf -x __llvm_prf_cnts %t | FileCheck %s -check-prefix=PRF_CNTS + +// RUN: %clang_lto_profgen=%t.lto.profraw -fuse-ld=lld -fcoverage-mapping -mllvm -enable-name-compression=false -DCODE=1 -ffunction-sections -fdata-sections -Wl,--gc-sections -flto -o %t.lto %s +// RUN: %run %t.lto +// RUN: llvm-profdata merge -o %t.lto.profdata %t.lto.profraw +// RUN: llvm-profdata show --all-functions %t.lto.profdata | FileCheck %s -check-prefix=PROF +// RUN: llvm-cov show %t.lto -instr-profile %t.lto.profdata | FileCheck %s -check-prefix=COV +// RUN: llvm-nm %t.lto | FileCheck %s -check-prefix=NM +// RUN: llvm-readelf -x __llvm_prf_names %t.lto | FileCheck %s -check-prefix=PRF_NAMES +// RUN: llvm-readelf -x __llvm_prf_cnts %t.lto | FileCheck %s -check-prefix=PRF_CNTS + +// Note: We expect foo() and some of the profiling data associated with it to +// be dead-stripped. + +// Note: When there is no code in a program, we expect to see the exact same +// set of external functions provided by the profile runtime. + +// RUN: %clang_profgen -fcoverage-mapping -ffunction-sections -fdata-sections -Wl,--gc-sections -shared -o %t.nocode.so %s +// RUN: llvm-nm -jgU %t.nocode.so | grep -vE "__start_.*|__stop_.*" > %t.nocode.syms +// RUN: llvm-nm -jgU %t | grep -vE "main|foo|_start|__libc_.*" > %t.code.syms +// RUN: diff %t.nocode.syms %t.code.syms + +#ifdef CODE + +// COV: [[@LINE+1]]{{ *}}|{{ *}}0|void foo() +void foo() {} + +// COV: [[@LINE+1]]{{ *}}|{{ *}}1|int main +int main() { return 0; } + +#endif // CODE + +// NM-NOT: foo + +// PROF: Counters: +// PROF-NEXT: main: +// PROF-NEXT: Hash: +// PROF-NEXT: Counters: 1 +// PROF-NEXT: Function count: 1 +// PROF-NEXT: Instrumentation level: Front-end +// PROF-NEXT: Functions shown: 1 +// PROF-NEXT: Total functions: 1 +// PROF-NEXT: Maximum function count: 1 +// PROF-NEXT: Maximum internal block count: 0 + +// Note: We don't expect the names of garbage collected functions to disappear +// from __llvm_prf_names, because collectPGOFuncNameStrings() glues the names +// together. + +// PRF_NAMES: Hex dump of section '__llvm_prf_names': +// PRF_NAMES-NEXT: {{.*}} 0800666f 6f016d61 696e {{.*$}} +// | | f o o # m a i n +// | |___________| +// | | +// UncompressedLen = 8 | +// | +// CompressedLen = 0 + +// Note: We expect the profile counters for garbage collected functions to also be +// garbage collected. + +// PRF_CNTS: Hex dump of section '__llvm_prf_cnts': +// PRF_CNTS-NEXT: {{.*}} 00000000 00000000 {{.*$}} Index: llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -844,6 +844,8 @@ CounterPtr->setAlignment(Align(8)); MaybeSetComdat(CounterPtr); CounterPtr->setLinkage(Linkage); + CounterPtr->setMetadata(LLVMContext::MD_associated, + MDNode::get(Ctx, ValueAsMetadata::get(Fn))); auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); // Allocate statically the array of pointers to value profile nodes for @@ -865,6 +867,8 @@ getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); ValuesVar->setAlignment(Align(8)); MaybeSetComdat(ValuesVar); + ValuesVar->setMetadata(LLVMContext::MD_associated, + MDNode::get(Ctx, ValueAsMetadata::get(Fn))); ValuesPtrExpr = ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx)); } @@ -899,6 +903,8 @@ Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT)); MaybeSetComdat(Data); Data->setLinkage(Linkage); + Data->setMetadata(LLVMContext::MD_associated, + MDNode::get(Ctx, ValueAsMetadata::get(Fn))); PD.RegionCounters = CounterPtr; PD.DataVar = Data; Index: llvm/test/Instrumentation/InstrProfiling/associated.ll =================================================================== --- /dev/null +++ llvm/test/Instrumentation/InstrProfiling/associated.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -pgo-instr-gen -instrprof -S | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-gen,instrprof -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@__profn_foo = hidden constant [3 x i8] c"foo" + +; CHECK: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8, !associated !0 +; CHECK: @__profd_foo = hidden global {{.*}}, section "__llvm_prf_data", align 8, !associated !0 + +define void @foo() { + call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0) + ret void +} + +declare void @llvm.instrprof.increment(i8*, i64, i32, i32) + +; CHECK: !0 = !{void ()* @foo}