diff --git a/clang/docs/SourceBasedCodeCoverage.rst b/clang/docs/SourceBasedCodeCoverage.rst --- a/clang/docs/SourceBasedCodeCoverage.rst +++ b/clang/docs/SourceBasedCodeCoverage.rst @@ -92,15 +92,42 @@ instrumented program crashes, or is killed by a signal, perfect coverage information can still be recovered. Continuous mode does not support value profiling for PGO, and is only supported on Darwin at the moment. Support for - Linux may be mostly complete but requires testing, and support for - Fuchsia/Windows may require more extensive changes: please get involved if - you are interested in porting this feature. + Linux may be mostly complete but requires testing, and support for Windows + may require more extensive changes: please get involved if you are interested + in porting this feature. .. code-block:: console # Step 2: Run the program. % LLVM_PROFILE_FILE="foo.profraw" ./foo +Note that continuous mode is also used on Fuchsia where it's the only supported +mode, but the implementation is different. The Darwin and Linux implementation +relies on padding and the ability to map a file over the existing memory +mapping which is generally only available on POSIX systems and isn't suitable +for other platforms. + +On Fuchsia, we rely on the the ability to relocate counters at runtime using a +level of indirection. On every counter access, we add a bias to the counter +address. This bias is stored in ``__llvm_profile_counter_bias`` symbol that's +provided by the profile runtime and is initially set to zero, meaning no +relocation. The runtime can map the profile into memory at abitrary location, +and set bias to the offset between the original and the new counter location, +at which point every subsequent counter access will be to the new location, +which allows updating profile directly akin to the continous mode. + +The advantage of this approach is that doesn't require any special OS support. +The disadvantage is the extra overhead due to additional instructions required +for each counter access (overhead both in terms of binary size and performance) +plus duplication of counters (i.e. one copy in the binary itself and another +copy that's mapped into memory). This implementation can be also enabled for +other platforms by passing the ``-runtime-counter-relocation`` option to the +backend during compilation. + +.. code-block:: console + + % clang++ -fprofile-instr-generate -fcoverage-mapping -mllvm -runtime-counter-relocation foo.cc -o foo + Creating coverage reports ========================= diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1149,6 +1149,7 @@ } else { addExportedSymbol(CmdArgs, "___llvm_profile_filename"); addExportedSymbol(CmdArgs, "___llvm_profile_raw_version"); + addExportedSymbol(CmdArgs, "___llvm_profile_counter_bias"); } addExportedSymbol(CmdArgs, "_lprofDirMode"); } diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt --- a/compiler-rt/lib/profile/CMakeLists.txt +++ b/compiler-rt/lib/profile/CMakeLists.txt @@ -52,6 +52,7 @@ GCDAProfiling.c InstrProfiling.c InstrProfilingValue.c + InstrProfilingBiasVar.c InstrProfilingBuffer.c InstrProfilingFile.c InstrProfilingMerge.c diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h --- a/compiler-rt/lib/profile/InstrProfiling.h +++ b/compiler-rt/lib/profile/InstrProfiling.h @@ -307,4 +307,11 @@ */ extern char INSTR_PROF_PROFILE_NAME_VAR[1]; /* __llvm_profile_filename. */ +/*! + * This variable is a weak symbol defined in InstrProfilingBiasVar.c. It + * allows compiler instrumentation to provide overriding definition with + * value from compiler command line. This variable has hidden visibility. + */ +COMPILER_RT_VISIBILITY extern intptr_t __llvm_profile_counter_bias; + #endif /* PROFILE_INSTRPROFILING_H_ */ diff --git a/compiler-rt/lib/profile/InstrProfilingBiasVar.c b/compiler-rt/lib/profile/InstrProfilingBiasVar.c new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingBiasVar.c @@ -0,0 +1,15 @@ +/*===- InstrProfilingBiasVar.c - profile counter bias variable setup ------===*\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ + +#include "InstrProfiling.h" + +/* The runtime should only provide its own definition of this symbol when the + * user has not specified one. Set this up by moving the runtime's copy of this + * symbol to an object file within the archive. + */ +COMPILER_RT_WEAK intptr_t __llvm_profile_counter_bias = -1; diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c --- a/compiler-rt/lib/profile/InstrProfilingBuffer.c +++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c @@ -10,6 +10,9 @@ #include "InstrProfilingInternal.h" #include "InstrProfilingPort.h" +/* When counters are being relocated at runtime, this parameter is set to 1. */ +COMPILER_RT_VISIBILITY int RuntimeCounterRelocation = 0; + /* When continuous mode is enabled (%c), this parameter is set to 1. * * This parameter is defined here in InstrProfilingBuffer.o, instead of in @@ -62,7 +65,8 @@ uint64_t DataSize, uint64_t CountersSize, uint64_t NamesSize, uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters, uint64_t *PaddingBytesAfterNames) { - if (!__llvm_profile_is_continuous_mode_enabled()) { + if (!__llvm_profile_is_continuous_mode_enabled() || + RuntimeCounterRelocation) { *PaddingBytesBeforeCounters = 0; *PaddingBytesAfterCounters = 0; *PaddingBytesAfterNames = __llvm_profile_get_num_padding_bytes(NamesSize); diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -448,6 +448,98 @@ } #endif // !defined(__Fuchsia__) && !defined(_WIN32) +static int writeMMappedFile(FILE *OutputFile, char **Profile) { + if (!OutputFile) + return -1; + + /* Write the data into a file. */ + setupIOBuffer(); + ProfDataWriter fileWriter; + initFileWriter(&fileWriter, OutputFile); + if (lprofWriteData(&fileWriter, NULL, 0)) { + PROF_ERR("Failed to write profile: %s\n", strerror(errno)); + return -1; + } + fflush(OutputFile); + + /* Get the file size. */ + uint64_t FileSize = ftell(OutputFile); + + /* Map the profile. */ + *Profile = (char *)mmap( + NULL, FileSize, PROT_READ | PROT_WRITE, MAP_SHARED, fileno(OutputFile), 0); + if (*Profile == MAP_FAILED) { + PROF_ERR("Unable to mmap profile: %s\n", strerror(errno)); + return -1; + } + + return 0; +} + +static void relocateCounters(void) { + if (!__llvm_profile_is_continuous_mode_enabled() || !RuntimeCounterRelocation) + return; + + /* Get the sizes of various profile data sections. Taken from + * __llvm_profile_get_size_for_buffer(). */ + const __llvm_profile_data *DataBegin = __llvm_profile_begin_data(); + const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); + uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); + const uint64_t CountersOffset = sizeof(__llvm_profile_header) + + (DataSize * sizeof(__llvm_profile_data)); + + int Length = getCurFilenameLength(); + char *FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1); + const char *Filename = getCurFilename(FilenameBuf, 0); + if (!Filename) + return; + + FILE *File = NULL; + char *Profile = NULL; + + if (!doMerging()) { + File = fopen(Filename, "w+b"); + if (!File) + return; + + if (writeMMappedFile(File, &Profile) == -1) { + fclose(File); + return; + } + } else { + File = lprofOpenFileEx(Filename); + if (!File) + return; + + uint64_t ProfileFileSize = 0; + if (getProfileFileSizeForMerging(File, &ProfileFileSize) == -1) { + lprofUnlockFileHandle(File); + fclose(File); + return; + } + + if (!ProfileFileSize) { + if (writeMMappedFile(File, &Profile) == -1) { + fclose(File); + return; + } + } else { + /* The merged profile has a non-zero length. Check that it is compatible + * with the data in this process. */ + if (mmapProfileForMerging(File, ProfileFileSize, &Profile) == -1) { + fclose(File); + return; + } + } + + lprofUnlockFileHandle(File); + } + + /* Update the profile fields based on the current mapping. */ + __llvm_profile_counter_bias = (intptr_t)Profile - + (uintptr_t)__llvm_profile_begin_counters() + CountersOffset; +} + static void initializeProfileForContinuousMode(void) { if (!__llvm_profile_is_continuous_mode_enabled()) return; @@ -715,7 +807,12 @@ } truncateCurrentFile(); - initializeProfileForContinuousMode(); + if (__llvm_profile_is_continuous_mode_enabled()) { + if (RuntimeCounterRelocation) + relocateCounters(); + else + initializeProfileForContinuousMode(); + } } /* Return buffer length that is required to store the current profile @@ -865,6 +962,9 @@ ProfileNameSpecifier PNS = PNS_unknown; int hasCommandLineOverrider = (INSTR_PROF_PROFILE_NAME_VAR[0] != 0); + if (__llvm_profile_counter_bias != -1) + RuntimeCounterRelocation = 1; + EnvFilenamePat = getFilenamePatFromEnv(); if (EnvFilenamePat) { /* Pass CopyFilenamePat = 1, to ensure that the filename would be valid diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h --- a/compiler-rt/lib/profile/InstrProfilingInternal.h +++ b/compiler-rt/lib/profile/InstrProfilingInternal.h @@ -184,6 +184,7 @@ unsigned lprofProfileDumped(); void lprofSetProfileDumped(); +COMPILER_RT_VISIBILITY extern int RuntimeCounterRelocation; COMPILER_RT_VISIBILITY extern void (*FreeHook)(void *); COMPILER_RT_VISIBILITY extern uint8_t *DynamicBufferIOBuffer; COMPILER_RT_VISIBILITY extern uint32_t VPBufferSize; diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c --- a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c @@ -34,16 +34,10 @@ #include "InstrProfilingInternal.h" #include "InstrProfilingUtil.h" -/* VMO that contains the coverage data shared across all modules. This symbol - * has default visibility and is exported in each module (executable or DSO) - * that statically links in the profiling runtime. - */ -zx_handle_t __llvm_profile_vmo; -/* Current offset within the VMO where data should be written next. This symbol - * has default visibility and is exported in each module (executable or DSO) - * that statically links in the profiling runtime. - */ -uint64_t __llvm_profile_offset; +/* VMO that contains the coverage data shared across all modules. */ +static zx_handle_t __llvm_profile_vmo; +/* Current offset within the VMO where data should be written next. */ +static uint64_t __llvm_profile_offset; static const char ProfileSinkName[] = "llvm-profile"; @@ -58,55 +52,6 @@ __sanitizer_log_write(s, ret + 1); } -static void createVMO() { - /* Don't create VMO if it has been alread created. */ - if (__llvm_profile_vmo != ZX_HANDLE_INVALID) - return; - - /* Get information about the current process. */ - zx_info_handle_basic_t Info; - zx_status_t Status = - _zx_object_get_info(_zx_process_self(), ZX_INFO_HANDLE_BASIC, &Info, - sizeof(Info), NULL, NULL); - if (Status != ZX_OK) { - lprofWrite("LLVM Profile: cannot get info about current process: %s\n", - _zx_status_get_string(Status)); - return; - } - - /* Create VMO to hold the profile data. */ - Status = _zx_vmo_create(0, ZX_VMO_RESIZABLE, &__llvm_profile_vmo); - if (Status != ZX_OK) { - lprofWrite("LLVM Profile: cannot create VMO: %s\n", - _zx_status_get_string(Status)); - return; - } - - /* Give the VMO a name including our process KOID so it's easy to spot. */ - char VmoName[ZX_MAX_NAME_LEN]; - snprintf(VmoName, sizeof(VmoName), "%s.%" PRIu64, ProfileSinkName, Info.koid); - _zx_object_set_property(__llvm_profile_vmo, ZX_PROP_NAME, VmoName, - strlen(VmoName)); - - /* Duplicate the handle since __sanitizer_publish_data consumes it. */ - zx_handle_t Handle; - Status = - _zx_handle_duplicate(__llvm_profile_vmo, ZX_RIGHT_SAME_RIGHTS, &Handle); - if (Status != ZX_OK) { - lprofWrite("LLVM Profile: cannot duplicate VMO handle: %s\n", - _zx_status_get_string(Status)); - _zx_handle_close(__llvm_profile_vmo); - __llvm_profile_vmo = ZX_HANDLE_INVALID; - return; - } - - /* Publish the VMO which contains profile data to the system. */ - __sanitizer_publish_data(ProfileSinkName, Handle); - - /* Use the dumpfile symbolizer markup element to write the name of VMO. */ - lprofWrite("LLVM Profile: {{{dumpfile:%s:%s}}}\n", ProfileSinkName, VmoName); -} - static uint32_t lprofVMOWriter(ProfDataWriter *This, ProfDataIOVec *IOVecs, uint32_t NumIOVecs) { /* Compute the total length of data to be written. */ @@ -175,6 +120,92 @@ static void dumpWithoutReturn(void) { dump(); } +static void createVMO(void) { + /* Don't create VMO if it has been alread created. */ + if (__llvm_profile_vmo != ZX_HANDLE_INVALID) + return; + + const __llvm_profile_data *DataBegin = __llvm_profile_begin_data(); + const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); + const uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); + const uint64_t CountersOffset = sizeof(__llvm_profile_header) + + (DataSize * sizeof(__llvm_profile_data)); + + zx_status_t Status; + + /* Create VMO to hold the profile data. */ + Status = _zx_vmo_create(0, ZX_VMO_RESIZABLE, &__llvm_profile_vmo); + if (Status != ZX_OK) { + lprofWrite("LLVM Profile: cannot create VMO: %s\n", + _zx_status_get_string(Status)); + return; + } + + /* Give the VMO a name that includes the module signature. */ + char VmoName[ZX_MAX_NAME_LEN]; + snprintf(VmoName, sizeof(VmoName), "%" PRIu64 ".profraw", + lprofGetLoadModuleSignature()); + _zx_object_set_property(__llvm_profile_vmo, ZX_PROP_NAME, VmoName, + strlen(VmoName)); + + /* Duplicate the handle since __sanitizer_publish_data consumes it. */ + zx_handle_t Handle; + Status = + _zx_handle_duplicate(__llvm_profile_vmo, ZX_RIGHT_SAME_RIGHTS, &Handle); + if (Status != ZX_OK) { + lprofWrite("LLVM Profile: cannot duplicate VMO handle: %s\n", + _zx_status_get_string(Status)); + _zx_handle_close(__llvm_profile_vmo); + __llvm_profile_vmo = ZX_HANDLE_INVALID; + return; + } + + /* Publish the VMO which contains profile data to the system. */ + __sanitizer_publish_data(ProfileSinkName, Handle); + + /* Use the dumpfile symbolizer markup element to write the name of VMO. */ + lprofWrite("LLVM Profile: {{{dumpfile:%s:%s}}}\n", ProfileSinkName, VmoName); + + /* Check if there is llvm/runtime version mismatch. */ + if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) { + lprofWrite("LLVM Profile: runtime and instrumentation version mismatch: " + "expected %d, but got %d\n", + INSTR_PROF_RAW_VERSION, + (int)GET_VERSION(__llvm_profile_get_version())); + return; + } + + /* Write the profile data into the mapped region. */ + ProfDataWriter VMOWriter; + initVMOWriter(&VMOWriter); + if (lprofWriteData(&VMOWriter, 0, 0) != 0) { + lprofWrite("LLVM Profile: failed to write data\n"); + return; + } + + uint64_t Len = 0; + Status = _zx_vmo_get_size(__llvm_profile_vmo, &Len); + if (Status != ZX_OK) { + lprofWrite("LLVM Profile: failed to get the VMO size: %s\n", + _zx_status_get_string(Status)); + return; + } + + uintptr_t Mapping; + Status = + _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, + 0, __llvm_profile_vmo, 0, Len, &Mapping); + if (Status != ZX_OK) { + lprofWrite("LLVM Profile: failed to map the VMO: %s\n", + _zx_status_get_string(Status)); + return; + } + + /* Update the profile fields based on the current mapping. */ + __llvm_profile_counter_bias = (intptr_t)Mapping - + (uintptr_t)__llvm_profile_begin_counters() + CountersOffset; +} + /* This method is invoked by the runtime initialization hook * InstrProfilingRuntime.o if it is linked in. */ diff --git a/compiler-rt/test/profile/ContinuousSyncMode/basic.c b/compiler-rt/test/profile/ContinuousSyncMode/basic.c --- a/compiler-rt/test/profile/ContinuousSyncMode/basic.c +++ b/compiler-rt/test/profile/ContinuousSyncMode/basic.c @@ -1,3 +1,5 @@ +// REQUIRES: darwin + // RUN: %clang -fprofile-instr-generate -fcoverage-mapping -o %t.exe %s // RUN: echo "garbage" > %t.profraw // RUN: env LLVM_PROFILE_FILE="%c%t.profraw" %run %t.exe diff --git a/compiler-rt/test/profile/ContinuousSyncMode/lit.local.cfg.py b/compiler-rt/test/profile/ContinuousSyncMode/lit.local.cfg.py deleted file mode 100644 --- a/compiler-rt/test/profile/ContinuousSyncMode/lit.local.cfg.py +++ /dev/null @@ -1,18 +0,0 @@ -import subprocess - -def getRoot(config): - if not config.parent: - return config - return getRoot(config.parent) - -root = getRoot(config) - -# As this has not been tested extensively on non-Darwin platforms, -# only Darwin support is enabled for the moment. However, continuous mode -# may "just work" without modification on Linux and other UNIX-likes (AIUI -# the default value for the GNU linker's `--section-alignment` flag is -# 0x1000, which is the size of a page on many systems). -# -# Please add supported configs to this list. -if root.host_os not in ['Darwin']: - config.unsupported = True diff --git a/compiler-rt/test/profile/ContinuousSyncMode/multiple-DSOs.c b/compiler-rt/test/profile/ContinuousSyncMode/multiple-DSOs.c --- a/compiler-rt/test/profile/ContinuousSyncMode/multiple-DSOs.c +++ b/compiler-rt/test/profile/ContinuousSyncMode/multiple-DSOs.c @@ -1,3 +1,5 @@ +// REQUIRES: darwin + // RUN: echo "void dso1(void) {}" > %t.dso1.c // RUN: echo "void dso2(void) {}" > %t.dso2.c // RUN: %clang_pgogen -dynamiclib -o %t.dso1.dylib %t.dso1.c diff --git a/compiler-rt/test/profile/ContinuousSyncMode/online-merging.c b/compiler-rt/test/profile/ContinuousSyncMode/online-merging.c --- a/compiler-rt/test/profile/ContinuousSyncMode/online-merging.c +++ b/compiler-rt/test/profile/ContinuousSyncMode/online-merging.c @@ -1,3 +1,5 @@ +// REQUIRES: darwin + // Test the online merging mode (%m) along with continuous mode (%c). // // Create & cd into a temporary directory. diff --git a/compiler-rt/test/profile/ContinuousSyncMode/pid-substitution.c b/compiler-rt/test/profile/ContinuousSyncMode/pid-substitution.c --- a/compiler-rt/test/profile/ContinuousSyncMode/pid-substitution.c +++ b/compiler-rt/test/profile/ContinuousSyncMode/pid-substitution.c @@ -1,3 +1,5 @@ +// REQUIRES: darwin + // RUN: rm -rf %t.dir && mkdir -p %t.dir // RUN: %clang_pgogen -o %t.exe %s // diff --git a/compiler-rt/test/profile/ContinuousSyncMode/basic.c b/compiler-rt/test/profile/ContinuousSyncMode/runtime-counter-relocation.c copy from compiler-rt/test/profile/ContinuousSyncMode/basic.c copy to compiler-rt/test/profile/ContinuousSyncMode/runtime-counter-relocation.c --- a/compiler-rt/test/profile/ContinuousSyncMode/basic.c +++ b/compiler-rt/test/profile/ContinuousSyncMode/runtime-counter-relocation.c @@ -1,4 +1,6 @@ -// RUN: %clang -fprofile-instr-generate -fcoverage-mapping -o %t.exe %s +// REQUIRES: linux + +// RUN: %clang -fprofile-instr-generate -fcoverage-mapping -mllvm -runtime-counter-relocation=true -o %t.exe %s // RUN: echo "garbage" > %t.profraw // RUN: env LLVM_PROFILE_FILE="%c%t.profraw" %run %t.exe // RUN: llvm-profdata show --counts --all-functions %t.profraw | FileCheck %s -check-prefix=CHECK-COUNTS @@ -17,11 +19,11 @@ // CHECK-COUNTS-NEXT: Maximum function count: 1 // CHECK-COUNTS-NEXT: Maximum internal block count: 1 -// CHECK-COVERAGE: Filename Regions Missed Regions Cover Functions Missed Functions Executed Lines Missed Lines Cover +// CHECK-COVERAGE: Filename Regions Missed Regions Cover Functions Missed Functions Executed Lines Missed Lines Cover // CHECK-COVERAGE-NEXT: --- -// CHECK-COVERAGE-NEXT: basic.c 4 1 75.00% 1 0 100.00% 5 2 60.00% +// CHECK-COVERAGE-NEXT: runtime-counter-relocation.c 4 1 75.00% 1 0 100.00% 5 2 60.00% // CHECK-COVERAGE-NEXT: --- -// CHECK-COVERAGE-NEXT: TOTAL 4 1 75.00% 1 0 100.00% 5 2 60.00% +// CHECK-COVERAGE-NEXT: TOTAL 4 1 75.00% 1 0 100.00% 5 2 60.00% extern int __llvm_profile_is_continuous_mode_enabled(void); diff --git a/compiler-rt/test/profile/ContinuousSyncMode/set-file-object.c b/compiler-rt/test/profile/ContinuousSyncMode/set-file-object.c --- a/compiler-rt/test/profile/ContinuousSyncMode/set-file-object.c +++ b/compiler-rt/test/profile/ContinuousSyncMode/set-file-object.c @@ -1,3 +1,5 @@ +// REQUIRES: darwin + // RUN: %clang_pgogen -o %t.exe %s // RUN: env LLVM_PROFILE_FILE="%c%t.profraw" %run %t.exe %t.bad 2>&1 | FileCheck %s diff --git a/compiler-rt/test/profile/ContinuousSyncMode/set-filename.c b/compiler-rt/test/profile/ContinuousSyncMode/set-filename.c --- a/compiler-rt/test/profile/ContinuousSyncMode/set-filename.c +++ b/compiler-rt/test/profile/ContinuousSyncMode/set-filename.c @@ -1,3 +1,5 @@ +// REQUIRES: darwin + // RUN: %clang_pgogen -o %t.exe %s // RUN: env LLVM_PROFILE_FILE="%c%t.profraw" %run %t.exe %t.profraw %t.bad diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -153,6 +153,10 @@ return "__llvm_profile_runtime_user"; } +inline StringRef getInstrProfCounterBiasVarName() { + return "__llvm_profile_counter_bias"; +} + /// Return the marker used to separate PGO names during serialization. inline StringRef getInstrProfNameSeparator() { return "\01"; } diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -82,6 +82,9 @@ /// Register-promote counter loads and stores in loops. void promoteCounterLoadStores(Function *F); + /// Returns true if relocating counters at runtime is enabled. + bool isRuntimeCounterRelocationEnabled() const; + /// Returns true if profile counter update register promotion is enabled. bool isCounterPromotionEnabled() const; diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -83,6 +83,11 @@ cl::desc("Rename counter variable of a comdat function based on cfg hash"), cl::init(true)); +cl::opt RuntimeCounterRelocation( + "runtime-counter-relocation", + cl::desc("Enable relocating counters at runtime."), + cl::init(false)); + cl::opt ValueProfileStaticAlloc( "vp-static-alloc", cl::desc("Do static counter allocation for value profiler"), @@ -431,6 +436,13 @@ return true; } +bool InstrProfiling::isRuntimeCounterRelocationEnabled() const { + if (RuntimeCounterRelocation.getNumOccurrences() > 0) + return RuntimeCounterRelocation; + + return TT.isOSFuchsia(); +} + bool InstrProfiling::isCounterPromotionEnabled() const { if (DoCounterPromotion.getNumOccurrences() > 0) return DoCounterPromotion; @@ -641,6 +653,26 @@ Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(), Counters, 0, Index); + if (isRuntimeCounterRelocationEnabled()) { + Type *Int64Ty = Type::getInt64Ty(M->getContext()); + Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext()); + Function *Fn = Inc->getParent()->getParent(); + Instruction &I = Fn->getEntryBlock().front(); + LoadInst *LI = dyn_cast(&I); + if (!LI) { + IRBuilder<> Builder(&I); + Type *Int64Ty = Type::getInt64Ty(M->getContext()); + GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName()); + if (!Bias) + Bias = new GlobalVariable(*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, + Constant::getNullValue(Int64Ty), + getInstrProfCounterBiasVarName()); + LI = Builder.CreateLoad(Int64Ty, Bias); + } + auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI); + Addr = Builder.CreateIntToPtr(Add, Int64PtrTy); + } + if (Options.Atomic || AtomicCounterUpdateAll) { Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(), AtomicOrdering::Monotonic); diff --git a/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -S -instrprof | FileCheck %s +; RUN: opt < %s -S -instrprof -runtime-counter-relocation | FileCheck -check-prefixes=RELOC %s + +target triple = "x86_64-unknown-linux-gnu" + +@__profn_foo = hidden constant [3 x i8] c"foo" +; RELOC: @__llvm_profile_counter_bias = linkonce_odr global i64 0 + +; CHECK-LABEL: define void @foo +; CHECK-NEXT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo, i64 0, i64 0) +; CHECK-NEXT: %1 = add i64 %pgocount, 1 +; CHECK-NEXT: store i64 %1, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo, i64 0, i64 0) +; RELOC-LABEL: define void @foo +; RELOC-NEXT: %1 = load i64, i64* @__llvm_profile_counter_bias +; RELOC-NEXT: %2 = add i64 ptrtoint ([1 x i64]* @__profc_foo to i64), %1 +; RELOC-NEXT: %3 = inttoptr i64 %2 to i64* +; RELOC-NEXT: %pgocount = load i64, i64* %3 +; RELOC-NEXT: %4 = add i64 %pgocount, 1 +; RELOC-NEXT: store i64 %4, i64* %3 +define void @foo() { + call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0) + ret void +} + +declare void @llvm.instrprof.increment(i8*, i64, i32, i32)