diff --git a/clang/test/CodeGen/Inputs/memprof.exe b/clang/test/CodeGen/Inputs/memprof.exe index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ ${OUTDIR}/memprof.memprofraw diff --git a/clang/test/CodeGen/memprof.cpp b/clang/test/CodeGen/memprof.cpp --- a/clang/test/CodeGen/memprof.cpp +++ b/clang/test/CodeGen/memprof.cpp @@ -11,16 +11,7 @@ // TODO: Use text profile inputs once that is available for memprof. // -// The following commands were used to compile the source to instrumented -// executables and collect raw binary format profiles: -// -// # Collect memory profile: -// $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \ -// -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \ -// -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \ -// memprof.cpp -o memprof.exe -fmemory-profile -// $ env MEMPROF_OPTIONS=log_path=stdout ./memprof.exe > memprof.memprofraw -// +// To update the inputs below, run Inputs/update_memprof_inputs.sh // RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata // Profile use: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ ${OUTDIR}/memprof.cc << EOF +#include +#include +#include +char *foo() { + return new char[10]; +} +char *foo2() { + return foo(); +} +char *bar() { + return foo2(); +} +char *baz() { + return foo2(); +} +char *recurse(unsigned n) { + if (!n) + return foo(); + return recurse(n-1); +} +int main(int argc, char **argv) { + // Test allocations with different combinations of stack contexts and + // coldness (based on lifetime, since they are all accessed a single time + // per byte via the memset). + char *a = new char[10]; + char *b = new char[10]; + char *c = foo(); + char *d = foo(); + char *e = bar(); + char *f = baz(); + memset(a, 0, 10); + memset(b, 0, 10); + memset(c, 0, 10); + memset(d, 0, 10); + memset(e, 0, 10); + memset(f, 0, 10); + // a and c have short lifetimes + delete[] a; + delete[] c; + // b, d, e, and f have long lifetimes and will be detected as cold by default. + sleep(200); + delete[] b; + delete[] d; + delete[] e; + delete[] f; + + // Loop ensures the two calls to recurse have stack contexts that only differ + // in one level of recursion. We should get two stack contexts reflecting the + // different levels of recursion and different allocation behavior (since the + // first has a very long lifetime and the second has a short lifetime). + for (unsigned i = 0; i < 2; i++) { + char *g = recurse(i + 3); + memset(g, 0, 10); + if (!i) + sleep(200); + delete[] g; + } + return 0; +} +EOF + +COMMON_FLAGS="-fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id -no-pie" + +${CLANG} ${COMMON_FLAGS} -fmemory-profile ${OUTDIR}/memprof.cc -o ${OUTDIR}/memprof.exe +env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof.exe > ${OUTDIR}/memprof.memprofraw + +${CLANG} ${COMMON_FLAGS} -fprofile-generate=. \ + ${OUTDIR}/memprof.cc -o ${OUTDIR}/pgo.exe +env LLVM_PROFILE_FILE=${OUTDIR}/memprof_pgo.profraw ${OUTDIR}/pgo.exe + +rm ${OUTDIR}/memprof.cc +rm ${OUTDIR}/pgo.exe diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -7,87 +7,8 @@ ; REQUIRES: x86_64-linux ;; TODO: Use text profile inputs once that is available for memprof. - -;; The input IR and raw profiles have been generated from the following source: -;; -;; #include -;; #include -;; #include -;; char *foo() { -;; return new char[10]; -;; } -;; char *foo2() { -;; return foo(); -;; } -;; char *bar() { -;; return foo2(); -;; } -;; char *baz() { -;; return foo2(); -;; } -;; char *recurse(unsigned n) { -;; if (!n) -;; return foo(); -;; return recurse(n-1); -;; } -;; int main(int argc, char **argv) { -;; // Test allocations with different combinations of stack contexts and -;; // coldness (based on lifetime, since they are all accessed a single time -;; // per byte via the memset). -;; char *a = new char[10]; -;; char *b = new char[10]; -;; char *c = foo(); -;; char *d = foo(); -;; char *e = bar(); -;; char *f = baz(); -;; memset(a, 0, 10); -;; memset(b, 0, 10); -;; memset(c, 0, 10); -;; memset(d, 0, 10); -;; memset(e, 0, 10); -;; memset(f, 0, 10); -;; // a and c have short lifetimes -;; delete[] a; -;; delete[] c; -;; // b, d, e, and f have long lifetimes and will be detected as cold by default. -;; sleep(200); -;; delete[] b; -;; delete[] d; -;; delete[] e; -;; delete[] f; -;; // Loop ensures the two calls to recurse have stack contexts that only differ -;; // in one level of recursion. We should get two stack contexts reflecting the -;; // different levels of recursion and different allocation behavior (since the -;; // first has a very long lifetime and the second has a short lifetime). -;; for (unsigned i = 0; i < 2; i++) { -;; char *g = recurse(i + 3); -;; memset(g, 0, 10); -;; if (!i) -;; sleep(200); -;; delete[] g; -;; } -;; return 0; -;; } -;; -;; The following commands were used to compile the source to instrumented -;; executables and collect raw binary format profiles: -;; -;; # Collect memory profile: -;; $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \ -;; -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \ -;; -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \ -;; memprof.cc -o memprof.exe -fmemory-profile -;; $ env MEMPROF_OPTIONS=log_path=stdout ./memprof.exe > memprof.memprofraw -;; -;; # Collect IR PGO profile: -;; $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \ -;; -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \ -;; -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \ -;; memprof.cc -o pgo.exe -fprofile-generate=. -;; $ ./pgo.exe -;; $ mv default_*.profraw memprof_pgo.profraw -;; -;; # Generate below LLVM IR for use in matching: +;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh. +;; # To generate below LLVM IR for use in matching: ;; $ clang++ -gmlt -fdebug-info-for-profiling -fno-omit-frame-pointer \ ;; -fno-optimize-sibling-calls memprof.cc -S -emit-llvm diff --git a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ +#include +int main(int argc, char **argv) { + char *x = (char *)malloc(10); + memset(x, 0, 10); + free(x); + x = (char *)malloc(10); + memset(x, 0, 10); + free(x); + return 0; +} +EOF + +read -r -d '' INLINE << EOF +#include +#include + +__attribute__((always_inline)) +void qux(int x) { + char *ptr = malloc(x); + memset(ptr, 0, x); + free(ptr); +} + +__attribute__((noinline)) +void foo(int x){ qux(x); } + +__attribute__((noinline)) +void bar(int x) { foo(x); } + +int main(int argc, char **argv) { + bar(argc); + return 0; +} +EOF + +read -r -d '' MULTI << EOF +#include +#include +#include +int main(int argc, char **argv) { + char *x = (char *)malloc(10); + memset(x, 0, 10); + free(x); + __memprof_profile_dump(); + x = (char *)malloc(10); + memset(x, 0, 10); + free(x); + return 0; +} +EOF + +DEFAULT_MEMPROF_FLAGS="-fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id -no-pie" + +# Map each test to their source and any additional flags separated by ; +declare -A INPUTS +INPUTS["basic"]="BASIC" +INPUTS["inline"]="INLINE" +INPUTS["multi"]="MULTI" +INPUTS["pic"]="BASIC;-pie" + +for name in "${!INPUTS[@]}"; do + IFS=";" read -r src flags <<< "${INPUTS[$name]}" + echo "${!src}" > ${OUTDIR}/${name}.c + ${CLANG} ${DEFAULT_MEMPROF_FLAGS} ${flags} ${OUTDIR}/${name}.c -o ${OUTDIR}/${name}.memprofexe + env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/${name}.memprofexe > ${OUTDIR}/${name}.memprofraw + rm ${OUTDIR}/${name}.c +done diff --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test --- a/llvm/test/tools/llvm-profdata/memprof-basic.test +++ b/llvm/test/tools/llvm-profdata/memprof-basic.test @@ -1,37 +1,6 @@ REQUIRES: x86_64-linux -The input raw profile test has been generated from the following source code: - -``` -#include -#include -int main(int argc, char **argv) { - char *x = (char *)malloc(10); - memset(x, 0, 10); - free(x); - x = (char *)malloc(10); - memset(x, 0, 10); - free(x); - return 0; -} -``` - -The following commands were used to compile the source to a memprof instrumented -executable and collect a raw binary format profile. Since the profile contains -virtual addresses for the callstack, we do not expect the raw binary profile to -be deterministic. The summary should be deterministic apart from changes to -the shared libraries linked in which could change the number of segments -recorded. - -``` -clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \ - -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer \ - -fno-optimize-sibling-calls -m64 -Wl,-build-id -no-pie \ - source.c -o basic.memprofexe - -env MEMPROF_OPTIONS=log_path=stdout ./basic.memprofexe > basic.memprofraw -``` - +To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang RUN: llvm-profdata show --memory %p/Inputs/basic.memprofraw --profiled-binary %p/Inputs/basic.memprofexe -o - | FileCheck %s We expect 2 MIB entries, 1 each for the malloc calls in the program. Any diff --git a/llvm/test/tools/llvm-profdata/memprof-inline.test b/llvm/test/tools/llvm-profdata/memprof-inline.test --- a/llvm/test/tools/llvm-profdata/memprof-inline.test +++ b/llvm/test/tools/llvm-profdata/memprof-inline.test @@ -1,41 +1,6 @@ REQUIRES: x86_64-linux -The input raw profile test has been generated from the following source code: - -``` -#include -#include - -__attribute__((always_inline)) -void qux(int x) { - char *ptr = malloc(x); - memset(ptr, 0, x); - free(ptr); -} - -__attribute__((noinline)) -void foo(int x){ qux(x); } - -__attribute__((noinline)) -void bar(int x) { foo(x); } - -int main(int argc, char **argv) { - bar(argc); - return 0; -} -``` - -Compile and run with the following commands: - -``` -bin/clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \ - -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer \ - -fno-optimize-sibling-calls -m64 -Wl,-build-id -no-pie \ - inline.c -o inline.memprofexe - -env MEMPROF_OPTIONS=log_path=stdout ./inline.memprofexe > inline.memprofraw -``` - +To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %p/Inputs/inline.memprofexe | FileCheck %s CHECK: MemprofProfile: diff --git a/llvm/test/tools/llvm-profdata/memprof-merge.test b/llvm/test/tools/llvm-profdata/memprof-merge.test --- a/llvm/test/tools/llvm-profdata/memprof-merge.test +++ b/llvm/test/tools/llvm-profdata/memprof-merge.test @@ -1,40 +1,13 @@ REQUIRES: x86_64-linux -The input memprof and instrumented raw profiles were generated from the following source code: - -``` -#include -#include -int main(int argc, char **argv) { - char *x = (char *)malloc(10); - memset(x, 0, 10); - free(x); - x = (char *)malloc(10); - memset(x, 0, 10); - free(x); - return 0; -} -``` - -Steps to collect the memprof raw profile and the instrprof raw profile: - -``` -# Collect instrprof profile with name compression disabled since some buildbots -# do not have zlib. -clang -mllvm -enable-name-compression=false -fprofile-generate source.c -o instr.out -./instr.out -mv *.profraw basic.profraw - -# Collect memprof profile. -clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \ - -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer \ - -fno-optimize-sibling-calls -m64 -Wl,-build-id -no-pie \ - source.c -o basic.memprofexe - -env MEMPROF_OPTIONS=log_path=stdout ./basic.memprofexe > basic.memprofraw -``` - -RUN: llvm-profdata merge %p/Inputs/basic.profraw %p/Inputs/basic.memprofraw --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof +RUN: echo ":ir" > %t.proftext +RUN: echo "main" >> %t.proftext +RUN: echo "742261418966908927" >> %t.proftext +RUN: echo "1" >> %t.proftext +RUN: echo "1" >> %t.proftext + +To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang +RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof RUN: llvm-profdata show %t.prof | FileCheck %s For now we only check the validity of the instrumented profile since we don't diff --git a/llvm/test/tools/llvm-profdata/memprof-multi.test b/llvm/test/tools/llvm-profdata/memprof-multi.test --- a/llvm/test/tools/llvm-profdata/memprof-multi.test +++ b/llvm/test/tools/llvm-profdata/memprof-multi.test @@ -1,39 +1,6 @@ REQUIRES: x86_64-linux -The input raw profile test has been generated from the following source code: - -``` -#include -#include -#include -int main(int argc, char **argv) { - char *x = (char *)malloc(10); - memset(x, 0, 10); - free(x); - __memprof_profile_dump(); - x = (char *)malloc(10); - memset(x, 0, 10); - free(x); - return 0; -} -``` - -The following commands were used to compile the source to a memprof instrumented -executable and collect a raw binary format profile. Since the profile contains -virtual addresses for the callstack, we do not expect the raw binary profile to -be deterministic. The summary should be deterministic apart from changes to -the shared libraries linked in which could change the number of segments -recorded. - -``` -clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \ - -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer \ - -fno-optimize-sibling-calls -m64 -Wl,-build-id -no-pie \ - source.c -o multi.memprofexe - -env MEMPROF_OPTIONS=log_path=stdout ./multi.memprofexe > multi.memprofraw -``` - +To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang RUN: llvm-profdata show --memory %p/Inputs/multi.memprofraw --profiled-binary %p/Inputs/multi.memprofexe -o - | FileCheck %s We expect 2 MIB entries, 1 each for the malloc calls in the program. diff --git a/llvm/test/tools/llvm-profdata/memprof-pic.test b/llvm/test/tools/llvm-profdata/memprof-pic.test --- a/llvm/test/tools/llvm-profdata/memprof-pic.test +++ b/llvm/test/tools/llvm-profdata/memprof-pic.test @@ -1,40 +1,12 @@ REQUIRES: x86_64-linux -This test ensures that llvm-profdata fails with a descriptive error message -when invoked on a memprof profiled binary which was built with position -independent code. - -The input raw profile test has been generated from the following source code: - -``` -#include -#include -int main(int argc, char **argv) { - char *x = (char *)malloc(10); - memset(x, 0, 10); - free(x); - x = (char *)malloc(10); - memset(x, 0, 10); - free(x); - return 0; -} -``` - -The following commands were used to compile the source to a memprof instrumented -executable and collect a raw binary format profile. Since the profile contains -virtual addresses for the callstack, we do not expect the raw binary profile to -be deterministic. The summary should be deterministic apart from changes to -the shared libraries linked in which could change the number of segments +Since the profile contains virtual addresses for the callstack, +we do not expect the raw binary profile to be deterministic. The +summary should be deterministic apart from changes to the shared +libraries linked in which could change the number of segments recorded. -``` -clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \ - -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer \ - -fno-optimize-sibling-calls -m64 -Wl,-build-id -pie \ - source.c -o pic.memprofexe - -env MEMPROF_OPTIONS=log_path=stdout ./pic.memprofexe > pic.memprofraw -``` - +To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang RUN: not llvm-profdata show --memory %p/Inputs/pic.memprofraw --profiled-binary %p/Inputs/pic.memprofexe -o - 2>&1 | FileCheck %s + CHECK: Unsupported position independent code