Index: bolt/lib/Rewrite/RewriteInstance.cpp =================================================================== --- bolt/lib/Rewrite/RewriteInstance.cpp +++ bolt/lib/Rewrite/RewriteInstance.cpp @@ -1839,8 +1839,9 @@ exit(1); } - if (opts::ReorderFunctions != ReorderFunctions::RT_NONE && - !opts::HotText.getNumOccurrences()) { + if (opts::Instrument || + (opts::ReorderFunctions != ReorderFunctions::RT_NONE && + !opts::HotText.getNumOccurrences())) { opts::HotText = true; } else if (opts::HotText && !BC->HasRelocations) { errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n"; Index: bolt/runtime/common.h =================================================================== --- bolt/runtime/common.h +++ bolt/runtime/common.h @@ -165,6 +165,20 @@ // Anonymous namespace covering everything but our library entry point namespace { +// Get the difference between runtime addrress of .text section and +// static address in section header table. Can be extracted from arbitrary +// pc value recorded at runtime to get the corresponding static address, which +// in turn can be used to search for indirect call description. Needed because +// indirect call descriptions are read-only non-relocatable data. +uint64_t getTextBaseAddress() { + uint64_t DynAddr; + uint64_t StaticAddr; + __asm__ volatile("leaq __hot_end(%%rip), %0\n\t" + "movabsq $__hot_end, %1\n\t" + : "=r"(DynAddr), "=r"(StaticAddr)); + return DynAddr - StaticAddr; +} + constexpr uint32_t BufSize = 10240; #define _STRINGIFY(x) #x Index: bolt/runtime/instr.cpp =================================================================== --- bolt/runtime/instr.cpp +++ bolt/runtime/instr.cpp @@ -215,6 +215,12 @@ /// __bolt_instr_setup, our initialization routine. BumpPtrAllocator *GlobalAlloc; +// Base address which we substract from recorded PC values when searching for +// indirect call description entries. Needed because indCall descriptions are +// mapped read-only and contain static addresses. Initialized in +// __bolt_instr_setup. +uint64_t TextBaseAddress = 0; + // Storage for GlobalAlloc which can be shared if not using // instrumentation-file-append-pid. void *GlobalMetadataStorage; @@ -1389,7 +1395,7 @@ const IndCallDescription *CallsiteDesc = &Ctx->IndCallDescriptions[CallsiteID]; const IndCallTargetDescription *TargetDesc = - Ctx->lookupIndCallTarget(Entry.Key); + Ctx->lookupIndCallTarget(Entry.Key - TextBaseAddress); if (!TargetDesc) { DEBUG(report("Failed to lookup indirect call target\n")); char LineBuf[BufSize]; @@ -1607,6 +1613,7 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() { __bolt_ind_call_counter_func_pointer = __bolt_instr_indirect_call; __bolt_ind_tailcall_counter_func_pointer = __bolt_instr_indirect_tailcall; + TextBaseAddress = getTextBaseAddress(); const uint64_t CountersStart = reinterpret_cast(&__bolt_instr_locations[0]); Index: bolt/test/runtime/instrumentation-indirect-2.c =================================================================== --- /dev/null +++ bolt/test/runtime/instrumentation-indirect-2.c @@ -0,0 +1,168 @@ +// Check that indirect call hash tables properly register multiple calls, +// and that calls from different processes don't get mixed up when using +// --instrumentation-file-append-pid. + +#include +#include +#include + +__attribute__((noinline)) void funcA(int pid) { printf("funcA %d\n", pid); } +__attribute__((noinline)) void funcB(int pid) { printf("funcB %d\n", pid); } +__attribute__((noinline)) void funcC(int pid) { printf("funcC %d\n", pid); } +__attribute__((noinline)) void funcD(int pid) { printf("funcD %d\n", pid); } +__attribute__((noinline)) void funcE(int pid) { printf("funcE %d\n", pid); } +__attribute__((noinline)) void funcF(int pid) { printf("funcF %d\n", pid); } +__attribute__((noinline)) void funcG(int pid) { printf("funcG %d\n", pid); } +__attribute__((noinline)) void funcH(int pid) { printf("funcH %d\n", pid); } +__attribute__((noinline)) void funcI(int pid) { printf("funcI %d\n", pid); } +__attribute__((noinline)) void funcJ(int pid) { printf("funcJ %d\n", pid); } +__attribute__((noinline)) void funcK(int pid) { printf("funcK %d\n", pid); } +__attribute__((noinline)) void funcL(int pid) { printf("funcL %d\n", pid); } +__attribute__((noinline)) void funcM(int pid) { printf("funcM %d\n", pid); } +__attribute__((noinline)) void funcN(int pid) { printf("funcN %d\n", pid); } +__attribute__((noinline)) void funcO(int pid) { printf("funcO %d\n", pid); } +__attribute__((noinline)) void funcP(int pid) { printf("funcP %d\n", pid); } + +int main() { + + void (*funcs[])(int) = {funcA, funcB, funcC, funcD, funcE, funcF, + funcG, funcH, funcI, funcJ, funcK, funcL, + funcM, funcN, funcO, funcP}; + int i; + + switch (fork()) { + case -1: + printf("Failed to fork!\n"); + exit(-1); + break; + case 0: + i = 0; + break; + default: + i = 1; + break; + } + int pid = getpid(); + for (; i < sizeof(funcs) / sizeof(void *); i += 2) { + funcs[i](pid); + } + + return 0; +} +/* +REQUIRES: system-linux, shell + +RUN: %clang %cflags %s -o %t.exe -Wl,-q -no-pie -fno-pie + +RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \ +RUN: --conservative-instrumentation -o %t.instrumented_conservative \ +RUN: --instrumentation-sleep-time=1 --instrumentation-no-counters-clear \ +RUN: --instrumentation-wait-forks + +# Instrumented program needs to finish returning zero +# Both output and profile must contain all 16 functions +RUN: %t.instrumented_conservative > %t.output +# Wait for profile and output to be fully written +RUN: bash %S/wait_file.sh %t.output +RUN: bash %S/wait_file.sh %t.fdata +RUN: cat %t.output | FileCheck %s --check-prefix=CHECK-OUTPUT +RUN: cat %t.fdata | FileCheck %s --check-prefix=CHECK-COMMON-PROF + +CHECK-OUTPUT-DAG: funcA +CHECK-OUTPUT-DAG: funcB +CHECK-OUTPUT-DAG: funcC +CHECK-OUTPUT-DAG: funcD +CHECK-OUTPUT-DAG: funcE +CHECK-OUTPUT-DAG: funcF +CHECK-OUTPUT-DAG: funcG +CHECK-OUTPUT-DAG: funcH +CHECK-OUTPUT-DAG: funcI +CHECK-OUTPUT-DAG: funcJ +CHECK-OUTPUT-DAG: funcK +CHECK-OUTPUT-DAG: funcL +CHECK-OUTPUT-DAG: funcM +CHECK-OUTPUT-DAG: funcN +CHECK-OUTPUT-DAG: funcO +CHECK-OUTPUT-DAG: funcP + +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcA 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcB 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcC 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcD 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcE 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcF 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcG 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcH 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcI 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcJ 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcK 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcL 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcM 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcN 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcO 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcP 0 0 1 + +RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t \ +RUN: --instrumentation-file-append-pid \ +RUN: -o %t.instrumented + +RUN: %t.instrumented > %t.output +# Wait till output is fully written in case child outlives parent +RUN: bash %S/wait_file.sh %t.output +# Make sure all functions were called +RUN: cat %t.output | FileCheck %s --check-prefix=CHECK-OUTPUT + +RUN: child_pid=$(cat %t.output | grep funcA | awk '{print $2;}') +RUN: par_pid=$(cat %t.output | grep funcB | awk '{print $2;}') + +RUN: bash %S/wait_file.sh %t.output %t.$child_pid.fdata +RUN: bash %S/wait_file.sh %t.output %t.$par_pid.fdata + +RUN: mv %t.$child_pid.fdata %t.child.fdata +RUN: mv %t.$par_pid.fdata %t.parent.fdata + +# Instrumented binary must produce two profiles with only local calls +# recorded. Functions called only in child should not appear in parent's +# process and vice versa. +RUN: cat %t.child.fdata | FileCheck %s --check-prefix=CHECK-CHILD +RUN: cat %t.child.fdata | FileCheck %s --check-prefix=CHECK-NOCHILD +RUN: cat %t.parent.fdata | FileCheck %s --check-prefix=CHECK-PARENT +RUN: cat %t.parent.fdata | FileCheck %s --check-prefix=CHECK-NOPARENT + +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcA 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcC 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcE 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcG 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcI 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcK 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcM 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcO 0 0 1 + +CHECK-NOCHILD-NOT: funcB +CHECK-NOCHILD-NOT: funcD +CHECK-NOCHILD-NOT: funcF +CHECK-NOCHILD-NOT: funcH +CHECK-NOCHILD-NOT: funcJ +CHECK-NOCHILD-NOT: funcL +CHECK-NOCHILD-NOT: funcN +CHECK-NOCHILD-NOT: funcP + +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcB 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcD 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcF 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcH 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcJ 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcL 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcN 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcP 0 0 1 + +CHECK-NOPARENT-NOT: funcA +CHECK-NOPARENT-NOT: funcC +CHECK-NOPARENT-NOT: funcE +CHECK-NOPARENT-NOT: funcG +CHECK-NOPARENT-NOT: funcI +CHECK-NOPARENT-NOT: funcK +CHECK-NOPARENT-NOT: funcM +CHECK-NOPARENT-NOT: funcO + + */ Index: bolt/test/runtime/wait_file.sh =================================================================== --- /dev/null +++ bolt/test/runtime/wait_file.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +check_file() { + local file="$1" + if [ -z "$file" ]; then + echo "No file passed!" + exit 1 + fi + if [ ! -f "$file" ]; then + return 1 + fi + + fuser "$file" &> /dev/null + if [ $? -eq 0 ]; then + return 1 + fi + return 0 +} + +wait_file() { + local file="$1" + local max_sleep=10 + check_file "$file" + local ret=$? + while [ $ret -ne 0 ] && [ $max_sleep -ne 0 ]; do + sleep 1 + max_sleep=$((max_sleep - 1)) + check_file $file + ret=$? + done + if [ $max_sleep -eq 0 ]; then + echo "The file does not exist or the test hung!" + exit 1 + fi + +} +file="$1" +wait_file "$file"