diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -869,67 +869,99 @@ endforeach() endif() -if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED) +if (CLANG_BOLT_INSTRUMENT OR CLANG_BOLT_PERF AND NOT LLVM_BUILD_INSTRUMENTED) set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang) set(CLANGXX_PATH ${CLANG_PATH}++) - set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst) - set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst) set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt) set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt) - # Instrument clang with BOLT - add_custom_target(clang-instrumented - DEPENDS ${CLANG_INSTRUMENTED} - ) - add_custom_command(OUTPUT ${CLANG_INSTRUMENTED} - DEPENDS clang llvm-bolt - COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED} - -instrument --instrumentation-file-append-pid - --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata - COMMENT "Instrumenting clang binary with BOLT" - VERBATIM - ) + if (CLANG_BOLT_INSTRUMENT) + set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst) + set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst) - # Make a symlink from clang-bolt.inst to clang++-bolt.inst - add_custom_target(clang++-instrumented - DEPENDS ${CLANGXX_INSTRUMENTED} - ) - add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED} - DEPENDS clang-instrumented - COMMAND ${CMAKE_COMMAND} -E create_symlink - ${CLANG_INSTRUMENTED} - ${CLANGXX_INSTRUMENTED} - COMMENT "Creating symlink from BOLT instrumented clang to clang++" - VERBATIM - ) + # Instrument clang with BOLT + add_custom_target(clang-instrumented + DEPENDS ${CLANG_INSTRUMENTED} + ) + add_custom_command(OUTPUT ${CLANG_INSTRUMENTED} + DEPENDS clang llvm-bolt + COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED} + -instrument --instrumentation-file-append-pid + --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata + COMMENT "Instrumenting clang binary with BOLT" + VERBATIM + ) + + # Make a symlink from clang-bolt.inst to clang++-bolt.inst + add_custom_target(clang++-instrumented + DEPENDS ${CLANGXX_INSTRUMENTED} + ) + add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED} + DEPENDS clang-instrumented + COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CLANG_INSTRUMENTED} + ${CLANGXX_INSTRUMENTED} + COMMENT "Creating symlink from BOLT instrumented clang to clang++" + VERBATIM + ) + endif() - # Build specified targets with instrumented Clang to collect the profile - set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/) - set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/) + # Set variables for profile collection step + if (CLANG_BOLT_INSTRUMENT) + set(CLANG_BOLT_CC ${CLANG_INSTRUMENTED}) + set(CLANG_BOLT_CXX ${CLANGXX_INSTRUMENTED}) + else() # CLANG_BOLT_PERF + set(CLANG_BOLT_CC ${CLANG_PATH}) + set(CLANG_BOLT_CXX ${CLANGXX_PATH}) + + # Perf sampling: + # - use maximum frequency to reduce training time + # - use cycle events instead of branches - empirically found to produce better results + # - if available, enable taken branch stack/LBR sampling (-j/--branch-filter) + set(PERF_CMDLINE perf record --event=cycles:u --output=${CMAKE_CURRENT_BINARY_DIR}/perf.data --freq=max) + if (CLANG_BOLT_PERF_LBR) + list(APPEND PERF_CMDLINE --branch-filter=any,u) + endif() + list(APPEND PERF_CMDLINE --) + + list(APPEND CLANG_BOLT_EXTRA_CMAKE_FLAGS + -DCMAKE_C_COMPILER_LAUNCHER="${PERF_CMDLINE}" + -DCMAKE_CXX_COMPILER_LAUNCHER="${PERF_CMDLINE}" + ) + endif() + + # Build specified targets to collect the profile + if (CLANG_BOLT_INSTRUMENT) + set(BOLT_PROFILE_DEPS clang++-instrumented) + else() + set(BOLT_PROFILE_DEPS clang) + endif() + set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-clang-stamps/) + set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-clang-bins/) set(build_configuration "$") include(ExternalProject) - ExternalProject_Add(bolt-instrumentation-profile - DEPENDS clang++-instrumented - PREFIX bolt-instrumentation-profile + ExternalProject_Add(bolt-profile + DEPENDS ${BOLT_PROFILE_DEPS} + PREFIX bolt-profile SOURCE_DIR ${CMAKE_SOURCE_DIR} STAMP_DIR ${STAMP_DIR} BINARY_DIR ${BINARY_DIR} EXCLUDE_FROM_ALL 1 CMAKE_ARGS - ${CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS} + ${CLANG_BOLT_EXTRA_CMAKE_FLAGS} # We shouldn't need to set this here, but INSTALL_DIR doesn't # seem to work, so instead I'm passing this through -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} - -DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED} - -DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED} - -DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED} + -DCMAKE_C_COMPILER=${CLANG_BOLT_CC} + -DCMAKE_CXX_COMPILER=${CLANG_BOLT_CXX} + -DCMAKE_ASM_COMPILER=${CLANG_BOLT_CC} -DCMAKE_ASM_COMPILER_ID=Clang -DCMAKE_BUILD_TYPE=Release - -DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS} + -DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_PROJECTS} -DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD} BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR} --config ${build_configuration} - --target ${CLANG_BOLT_INSTRUMENT_TARGETS} + --target ${CLANG_BOLT_TARGETS} INSTALL_COMMAND "" STEP_TARGETS configure build USES_TERMINAL_CONFIGURE 1 @@ -937,20 +969,30 @@ USES_TERMINAL_INSTALL 1 ) - # Merge profiles into one using merge-fdata add_custom_target(clang-bolt-profile DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ) - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata - DEPENDS merge-fdata bolt-instrumentation-profile-build - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMAND ${Python3_EXECUTABLE} - ${CMAKE_CURRENT_SOURCE_DIR}/utils/perf-training/perf-helper.py merge-fdata - $ ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata - ${CMAKE_CURRENT_BINARY_DIR} - COMMENT "Preparing BOLT profile" - VERBATIM - ) + if (CLANG_BOLT_INSTRUMENT) + # Merge profiles into one using merge-fdata + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata + DEPENDS merge-fdata bolt-profile-build + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${Python3_EXECUTABLE} + ${CMAKE_CURRENT_SOURCE_DIR}/utils/perf-training/perf-helper.py merge-fdata + $ prof.fdata . + COMMENT "Preparing BOLT profile" + VERBATIM + ) + else() # CLANG_BOLT_PERF + # Convert the profile using perf2bolt + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata + DEPENDS perf2bolt bolt-profile-build + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND perf2bolt ${CLANG_PATH} -p prof.data -o prof.fdata + COMMENT "Converting perf profile to BOLT fdata" + VERBATIM + ) + endif() # Optimize original (pre-bolt) Clang using the collected profile add_custom_target(clang-bolt diff --git a/clang/cmake/caches/BOLT.cmake b/clang/cmake/caches/BOLT.cmake --- a/clang/cmake/caches/BOLT.cmake +++ b/clang/cmake/caches/BOLT.cmake @@ -1,15 +1,23 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "") set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "") -set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "") -set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "") +set(CLANG_BOLT_PERF OFF CACHE BOOL "") +set(CLANG_BOLT_PERF_LBR OFF CACHE BOOL "") + +if (CLANG_BOLT_PERF) + set(CLANG_BOLT_INSTRUMENT OFF CACHE BOOL "" FORCE) +endif() +set(CLANG_BOLT_PROJECTS "llvm" CACHE STRING "") +set(CLANG_BOLT_TARGETS "count" CACHE STRING "") set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") -set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "") +set(CLANG_BOLT_EXTRA_CMAKE_FLAGS "" CACHE STRING "") set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "") set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") # Disable function splitting enabled by default in GCC8+ if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition" + CACHE FORCE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition" + CACHE FORCE) endif()