Index: compiler-rt/lib/profile/InstrProfilingFile.c =================================================================== --- compiler-rt/lib/profile/InstrProfilingFile.c +++ compiler-rt/lib/profile/InstrProfilingFile.c @@ -167,6 +167,59 @@ } } +/// Get the size of the profile file. If there are any errors, print the +/// message under the assumption that the profile is being read for merging +/// purposes, and return -1. Otherwise return the file size in the inout param +/// \p ProfileFileSize. +static int getProfileFileSizeForMerging(FILE *ProfileFile, + uint64_t *ProfileFileSize) { + if (fseek(ProfileFile, 0L, SEEK_END) == -1) { + PROF_ERR("Unable to merge profile data, unable to get size: %s\n", + strerror(errno)); + return -1; + } + *ProfileFileSize = ftell(ProfileFile); + + /* Restore file offset. */ + if (fseek(ProfileFile, 0L, SEEK_SET) == -1) { + PROF_ERR("Unable to merge profile data, unable to rewind: %s\n", + strerror(errno)); + return -1; + } + + if (*ProfileFileSize > 0 && + *ProfileFileSize < sizeof(__llvm_profile_header)) { + PROF_WARN("Unable to merge profile data: %s\n", + "source profile file is too small."); + return -1; + } + return 0; +} + +/// mmap() \p ProfileFile for profile merging purposes, assuming that an +/// exclusive lock is held on the file and that \p ProfileFileSize is the +/// length of the file. Return the mmap'd buffer in the inout variable +/// \p ProfileBuffer. Returns -1 on failure. On success, the caller is +/// responsible for unmapping the mmap'd buffer in \p ProfileBuffer. +static int mmapProfileForMerging(FILE *ProfileFile, uint64_t ProfileFileSize, + char **ProfileBuffer) { + *ProfileBuffer = mmap(NULL, ProfileFileSize, PROT_READ, MAP_SHARED | MAP_FILE, + fileno(ProfileFile), 0); + if (*ProfileBuffer == MAP_FAILED) { + PROF_ERR("Unable to merge profile data, mmap failed: %s\n", + strerror(errno)); + return -1; + } + + if (__llvm_profile_check_compatibility(*ProfileBuffer, ProfileFileSize)) { + (void)munmap(*ProfileBuffer, ProfileFileSize); + PROF_WARN("Unable to merge profile data: %s\n", + "source profile file is not compatible."); + return -1; + } + return 0; +} + /* Read profile data in \c ProfileFile and merge with in-memory profile counters. Returns -1 if there is fatal error, otheriwse 0 is returned. Returning 0 does not mean merge is actually @@ -176,42 +229,18 @@ uint64_t ProfileFileSize; char *ProfileBuffer; - if (fseek(ProfileFile, 0L, SEEK_END) == -1) { - PROF_ERR("Unable to merge profile data, unable to get size: %s\n", - strerror(errno)); + /* Get the size of the profile on disk. */ + if (getProfileFileSizeForMerging(ProfileFile, &ProfileFileSize) == -1) return -1; - } - ProfileFileSize = ftell(ProfileFile); - - /* Restore file offset. */ - if (fseek(ProfileFile, 0L, SEEK_SET) == -1) { - PROF_ERR("Unable to merge profile data, unable to rewind: %s\n", - strerror(errno)); - return -1; - } /* Nothing to merge. */ - if (ProfileFileSize < sizeof(__llvm_profile_header)) { - if (ProfileFileSize) - PROF_WARN("Unable to merge profile data: %s\n", - "source profile file is too small."); + if (!ProfileFileSize) return 0; - } - ProfileBuffer = mmap(NULL, ProfileFileSize, PROT_READ, MAP_SHARED | MAP_FILE, - fileno(ProfileFile), 0); - if (ProfileBuffer == MAP_FAILED) { - PROF_ERR("Unable to merge profile data, mmap failed: %s\n", - strerror(errno)); + /* mmap() the profile and check that it is compatible with the data in + * the current image. */ + if (mmapProfileForMerging(ProfileFile, ProfileFileSize, &ProfileBuffer) == -1) return -1; - } - - if (__llvm_profile_check_compatibility(ProfileBuffer, ProfileFileSize)) { - (void)munmap(ProfileBuffer, ProfileFileSize); - PROF_WARN("Unable to merge profile data: %s\n", - "source profile file is not compatible."); - return 0; - } /* Now start merging */ __llvm_profile_merge_from_buffer(ProfileBuffer, ProfileFileSize); @@ -345,11 +374,6 @@ if (!Filename) return; - /* By pass file truncation to allow online raw profile - * merging. */ - if (lprofCurFilename.MergePoolSize) - return; - /* Only create the profile directory and truncate an existing profile once. * In continuous mode, this is necessary, as the profile is written-to by the * runtime initializer. */ @@ -361,6 +385,10 @@ createProfileDir(Filename); + /* By pass file truncation to allow online raw profile merging. */ + if (lprofCurFilename.MergePoolSize) + return; + /* Truncate the file. Later we'll reopen and append. */ File = fopen(Filename, "w"); if (!File) @@ -368,6 +396,13 @@ fclose(File); } +#ifndef _MSC_VER +static void assertIsZero(int *i) { + if (*i) + PROF_WARN("Expected flag to be 0, but got: %d\n", *i); +} +#endif + static void initializeProfileForContinuousMode(void) { #if defined(__Fuchsia__) || defined(_WIN32) PROF_ERR("%s\n", "Continuous mode not yet supported on Fuchsia or Windows."); @@ -400,28 +435,70 @@ return; } - /* Open the raw profile in append mode. */ int Length = getCurFilenameLength(); char *FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1); const char *Filename = getCurFilename(FilenameBuf, 0); if (!Filename) return; - FILE *File = fopen(Filename, "a+b"); - if (!File) - return; + + FILE *File = NULL; + off_t CurrentFileOffset = 0; + off_t OffsetModPage = 0; + + /* Whether an exclusive lock on the profile must be dropped after init. + * Use a cleanup to warn if the unlock does not occur. */ + COMPILER_RT_CLEANUP(assertIsZero) int ProfileRequiresUnlock = 0; + + /* Whether a full write of an uninitialized profile is required. + * Use a cleanup to warn if the write does not occur. */ + COMPILER_RT_CLEANUP(assertIsZero) int ProfileRequiresFullWrite = 0; + + if (!doMerging()) { + /* We are not merging profiles, so open the raw profile in append mode. */ + File = fopen(Filename, "a+b"); + if (!File) + return; + + /* Check that the offset within the file is page-aligned. */ + CurrentFileOffset = ftello(File); + OffsetModPage = CurrentFileOffset % PageSize; + if (OffsetModPage != 0) { + PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not" + "page-aligned. CurrentFileOffset = %lld, pagesz = %u.\n", + CurrentFileOffset, PageSize); + return; + } + + ProfileRequiresFullWrite = 1; + } else { + /* We are merging profiles. Map the counter section as shared memory into + * the profile, i.e. into each participating process. An increment in one + * process should be visible to every other process with the same counter + * section mapped. */ + File = lprofOpenFileEx(Filename); + if (!File) + return; + + ProfileRequiresUnlock = 1; + + uint64_t ProfileFileSize; + if (getProfileFileSizeForMerging(File, &ProfileFileSize) == -1) + goto unlockAndReturn; + + if (ProfileFileSize == 0) { + ProfileRequiresFullWrite = 1; + } else { + /* The merged profile has a non-zero length. Check that it is compatible + * with the data in this process. */ + char *ProfileBuffer; + if (mmapProfileForMerging(File, ProfileFileSize, &ProfileBuffer) == -1 || + munmap(ProfileBuffer, ProfileFileSize) == -1) + goto unlockAndReturn; + } + } int Fileno = fileno(File); - /* Check that the offset within the file is page-aligned. */ - off_t CurrentFileOffset = ftello(File); - off_t OffsetModPage = CurrentFileOffset % PageSize; - if (OffsetModPage != 0) { - PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not" - "page-aligned. CurrentFileOffset = %lld, pagesz = %u.\n", - CurrentFileOffset, PageSize); - return; - } - /* Determine how much padding is needed before/after the counters and after * the names. */ uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters, @@ -438,11 +515,14 @@ /* Write the partial profile. This grows the file to a point where the mmap() * can succeed. Leak the file handle, as the file should stay open. */ - setProfileFile(File); - int rc = writeFile(Filename); - if (rc) - PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno)); - setProfileFile(NULL); + if (ProfileRequiresFullWrite) { + setProfileFile(File); + int rc = writeFile(Filename); + ProfileRequiresFullWrite = 0; + if (rc) + PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno)); + setProfileFile(NULL); + } uint64_t *CounterMmap = (uint64_t *)mmap( (void *)CountersBegin, PageAlignedCountersLength, PROT_READ | PROT_WRITE, @@ -456,7 +536,13 @@ " - FileOffsetToCounters: %llu\n", strerror(errno), CountersBegin, PageAlignedCountersLength, Fileno, FileOffsetToCounters); - return; + goto unlockAndReturn; + } + +unlockAndReturn: + if (ProfileRequiresUnlock) { + lprofUnlockFileHandle(File); + ProfileRequiresUnlock = 0; } #endif // defined(__Fuchsia__) || defined(_WIN32) } @@ -530,12 +616,6 @@ FilenamePat); return -1; } - if (MergingEnabled) { - PROF_WARN("%%c specifier can not be used with profile merging (%%m) " - "in %s.\n", - FilenamePat); - return -1; - } __llvm_profile_enable_continuous_mode(); I++; /* advance to 'c' */ @@ -545,12 +625,6 @@ FilenamePat); return -1; } - if (__llvm_profile_is_continuous_mode_enabled()) { - PROF_WARN("%%c specifier can not be used with profile merging (%%m) " - "in %s.\n", - FilenamePat); - return -1; - } MergingEnabled = 1; if (FilenamePat[I] == 'm') lprofCurFilename.MergePoolSize = 1; Index: compiler-rt/lib/profile/InstrProfilingPort.h =================================================================== --- compiler-rt/lib/profile/InstrProfilingPort.h +++ compiler-rt/lib/profile/InstrProfilingPort.h @@ -22,6 +22,7 @@ /* Need to include and */ #define COMPILER_RT_FTRUNCATE(f,l) _chsize(_fileno(f),l) #define COMPILER_RT_ALWAYS_INLINE __forceinline +#define COMPILER_RT_CLEANUP(x) #elif __GNUC__ #define COMPILER_RT_ALIGNAS(x) __attribute__((aligned(x))) #define COMPILER_RT_VISIBILITY __attribute__((visibility("hidden"))) @@ -29,6 +30,7 @@ #define COMPILER_RT_ALLOCA __builtin_alloca #define COMPILER_RT_FTRUNCATE(f,l) ftruncate(fileno(f),l) #define COMPILER_RT_ALWAYS_INLINE inline __attribute((always_inline)) +#define COMPILER_RT_CLEANUP(x) __attribute__((cleanup(x))) #endif #if defined(__APPLE__) Index: compiler-rt/test/profile/ContinuousSyncMode/online-merging.c =================================================================== --- /dev/null +++ compiler-rt/test/profile/ContinuousSyncMode/online-merging.c @@ -0,0 +1,138 @@ +// Test the online merging mode (%m) along with continuous mode (%c). +// +// Create & cd into a temporary directory. +// RUN: rm -rf %t.dir && mkdir -p %t.dir && cd %t.dir +// +// Create two DSOs and a driver program that uses them. +// RUN: echo "void dso1(void) {}" > dso1.c +// RUN: echo "void dso2(void) {}" > dso2.c +// RUN: %clang_pgogen -dynamiclib -o %t.dir/dso1.dylib dso1.c +// RUN: %clang_pgogen -dynamiclib -o %t.dir/dso2.dylib dso2.c +// RUN: %clang_pgogen -o main.exe %s %t.dir/dso1.dylib %t.dir/dso2.dylib +// +// === Round 1 === +// Test merging+continuous mode without any file contention. +// +// RUN: env LLVM_PROFILE_FILE="%t.dir/profdir/%m%c.profraw" %run %t.dir/main.exe nospawn +// RUN: llvm-profdata merge -o %t.profdata %t.dir/profdir +// RUN: llvm-profdata show --counts --all-functions %t.profdata | FileCheck %s -check-prefix=ROUND1 + +// ROUND1-LABEL: Counters: +// ROUND1-DAG: dso1: +// ROUND1-DAG: Hash: 0x{{.*}} +// ROUND1-DAG: Counters: 1 +// ROUND1-DAG: Block counts: [1] +// ROUND1-DAG: dso2: +// ROUND1-DAG: Hash: 0x{{.*}} +// ROUND1-DAG: Counters: 1 +// ROUND1-DAG: Block counts: [1] +// ROUND1-DAG: main: +// ROUND1-DAG: Hash: 0x{{.*}} +// ROUND1-LABEL: Instrumentation level: IR +// ROUND1-NEXT: Functions shown: 3 +// ROUND1-NEXT: Total functions: 3 +// ROUND1-NEXT: Maximum function count: 1 +// ROUND1-NEXT: Maximum internal block count: 1 +// +// === Round 2 === +// Test merging+continuous mode with some file contention. +// +// RUN: env LLVM_PROFILE_FILE="%t.dir/profdir/%m%c.profraw" %run %t.dir/main.exe spawn 'LLVM_PROFILE_FILE=%t.dir/profdir/%m%c.profraw' +// RUN: llvm-profdata merge -o %t.profdata %t.dir/profdir +// RUN: llvm-profdata show --counts --all-functions %t.profdata | FileCheck %s -check-prefix=ROUND2 + +// ROUND2-LABEL: Counters: +// ROUND2-DAG: dso1: +// ROUND2-DAG: Hash: 0x{{.*}} +// ROUND2-DAG: Counters: 1 +// ROUND2-DAG: Block counts: [97] +// ROUND2-DAG: dso2: +// ROUND2-DAG: Hash: 0x{{.*}} +// ROUND2-DAG: Counters: 1 +// ROUND2-DAG: Block counts: [97] +// ROUND2-DAG: main: +// ROUND2-DAG: Hash: 0x{{.*}} +// ROUND2-LABEL: Instrumentation level: IR +// ROUND2-NEXT: Functions shown: 3 +// ROUND2-NEXT: Total functions: 3 +// ROUND2-NEXT: Maximum function count: 97 +// ROUND2-NEXT: Maximum internal block count: 33 + +#include +#include +#include +#include +#include +#include + +const int num_child_procs_to_spawn = 32; + +extern int __llvm_profile_is_continuous_mode_enabled(void); +extern char *__llvm_profile_get_filename(void); + +void dso1(void); +void dso2(void); + +// Change to "#define" for debug output. +#undef DEBUG_TEST + +#ifdef DEBUG_TEST +#define DEBUG(...) fprintf(stderr, __VA_ARGS__); +#else +#define DEBUG(...) +#endif + +int main(int argc, char *const argv[]) { + if (strcmp(argv[1], "nospawn") == 0) { + DEBUG("Hello from child (pid = %d, cont-mode-enabled = %d, profile = %s).\n", + getpid(), __llvm_profile_is_continuous_mode_enabled(), __llvm_profile_get_filename()); + + dso1(); + dso2(); + return 0; + } else if (strcmp(argv[1], "spawn") == 0) { + // This is the start of Round 2. + // Expect Counts[dsoX] = 1, as this was the state at the end of Round 1. + + int I; + pid_t child_pids[num_child_procs_to_spawn]; + char *const child_argv[] = {argv[0], "nospawn", NULL}; + char *const child_envp[] = {argv[2], NULL}; + for (I = 0; I < num_child_procs_to_spawn; ++I) { + dso1(); // Counts[dsoX] += 2 * num_child_procs_to_spawn + dso2(); + + DEBUG("Spawning child with argv = {%s, %s, NULL} and envp = {%s, NULL}\n", + child_argv[0], child_argv[1], child_envp[0]); + + int ret = posix_spawn(&child_pids[I], argv[0], NULL, NULL, child_argv, + child_envp); + if (ret != 0) { + fprintf(stderr, "Child %d could not be spawned: ret = %d, msg = %s\n", + I, ret, strerror(ret)); + return 1; + } + + DEBUG("Spawned child %d (pid = %d).\n", I, child_pids[I]); + } + for (I = 0; I < num_child_procs_to_spawn; ++I) { + dso1(); // Counts[dsoX] += num_child_procs_to_spawn + dso2(); + + int status; + waitpid(child_pids[I], &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + fprintf(stderr, "Child %d did not exit with code 0.\n", I); + return 1; + } + } + + // At the end of Round 2, we have: + // Counts[dsoX] = 1 + (2 * num_child_procs_to_spawn) + num_child_procs_to_spawn + // = 97 + + return 0; + } + + return 1; +}