diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt --- a/clang-tools-extra/CMakeLists.txt +++ b/clang-tools-extra/CMakeLists.txt @@ -4,6 +4,7 @@ add_subdirectory(clang-reorder-fields) add_subdirectory(modularize) add_subdirectory(clang-tidy) +add_subdirectory(clang-misexpect) add_subdirectory(clang-change-namespace) add_subdirectory(clang-doc) diff --git a/clang-tools-extra/clang-misexpect/CMakeLists.txt b/clang-tools-extra/clang-misexpect/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clang-misexpect/CMakeLists.txt @@ -0,0 +1,17 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +add_clang_library(clangMisExpect + ClangMisExpect.cpp + + LINK_LIBS + clangBasic + clangCodeGen + clangFrontend + clangFrontendTool + clangTooling + clangToolingCore + ) + +add_subdirectory(tool) diff --git a/clang-tools-extra/clang-misexpect/ClangMisExpect.h b/clang-tools-extra/clang-misexpect/ClangMisExpect.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clang-misexpect/ClangMisExpect.h @@ -0,0 +1,55 @@ +//===-- ClangMisExpect.h - ClangMisexpect -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a method to create the FrontendActionFactory for the +// clang-misexpect tool. The factory consumes a compilation database and valid +// profiling data to run the compiler over a codebase and issue warnings +// generated from the -Wmisexpect compiler flags. +// +//===----------------------------------------------------------------------===// + +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Frontend/FrontendDiagnostic.h" +#include "clang/Frontend/FrontendOptions.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "clang/Rewrite/Frontend/FrontendActions.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace clang { +namespace misexpect { + +enum ProfileKind { + Clang, + IR, + CSIR, + Sample, +}; + +class MisExpectFactory : public tooling::FrontendActionFactory { + using Path = std::string; + +public: + MisExpectFactory(Path Profile, ProfileKind ProfileType); + + bool runInvocation(std::shared_ptr Invocation, + FileManager *Files, + std::shared_ptr PCHContainerOps, + DiagnosticConsumer *DiagConsumer) override; + + std::unique_ptr create() override; + +private: + Path ProfilePath; + ProfileKind ProfileType; +}; + +} // namespace misexpect +} // namespace clang diff --git a/clang-tools-extra/clang-misexpect/ClangMisExpect.cpp b/clang-tools-extra/clang-misexpect/ClangMisExpect.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clang-misexpect/ClangMisExpect.cpp @@ -0,0 +1,88 @@ +//===-- ClangMisExpect.cpp - ClangMisexpect ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a method to create the FrontendActionFactory for the +// clang-misexpect tool. The factory consumes a compilation database and valid +// profiling data to run the compiler over a codebase and issue warnings +// generated from the -Wmisexpect compiler flags. +// +//===----------------------------------------------------------------------===// + +#include "ClangMisExpect.h" +#include "clang/Basic/CodeGenOptions.h" +#include "clang/CodeGen/CodeGenAction.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/CompilationDatabase.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace clang::tooling; +using namespace misexpect; + +#define DEBUG_TYPE "misexpect" + +MisExpectFactory::MisExpectFactory(Path ProfilePath, ProfileKind ProfileType) + : ProfilePath(ProfilePath), ProfileType(ProfileType) {} + +std::unique_ptr MisExpectFactory::create() { + return std::make_unique(); +} + +bool MisExpectFactory::runInvocation( + std::shared_ptr Invocation, FileManager *Files, + std::shared_ptr PCHContainerOps, + DiagnosticConsumer *DiagConsumer) { + // Only run the compiler through IR generation + Invocation->getFrontendOpts().ProgramAction = frontend::EmitLLVMOnly; + + // clear the existing profile flags and metadata + Invocation->getCodeGenOpts().setProfileUse(CodeGenOptions::ProfileNone); + Invocation->getCodeGenOpts().setProfileInstr(CodeGenOptions::ProfileNone); + Invocation->getCodeGenOpts().ProfileInstrumentUsePath = ""; + Invocation->getCodeGenOpts().SampleProfileFile = ""; + // Optimizaiton level must be at least 1 for misexpect warnings and PGO + Invocation->getCodeGenOpts().OptimizationLevel = 1; + + // duplicate the logic in ExecuteCompilerInvocation to process llvm options + if (!Invocation->getFrontendOpts().LLVMArgs.empty()) { + unsigned NumArgs = Invocation->getFrontendOpts().LLVMArgs.size(); + auto Args = std::make_unique(NumArgs + 2); + Args[0] = "clang (LLVM option parsing)"; + for (unsigned i = 0; i != NumArgs; ++i) + Args[i + 1] = Invocation->getFrontendOpts().LLVMArgs[i].c_str(); + Args[NumArgs + 1] = nullptr; + llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get()); + } + + // set new profiling options based on profile type + switch (ProfileType) { + case ProfileKind::Clang: + Invocation->getCodeGenOpts().setProfileUse( + CodeGenOptions::ProfileClangInstr); + break; + case ProfileKind::IR: + Invocation->getCodeGenOpts().setProfileUse(CodeGenOptions::ProfileIRInstr); + break; + case ProfileKind::CSIR: + Invocation->getCodeGenOpts().setProfileUse( + CodeGenOptions::ProfileCSIRInstr); + break; + case ProfileKind::Sample: + Invocation->getCodeGenOpts().SampleProfileFile = ProfilePath; + break; + }; + + if (ProfileType != ProfileKind::Sample) + Invocation->getCodeGenOpts().ProfileInstrumentUsePath = ProfilePath; + + return FrontendActionFactory::runInvocation(Invocation, Files, + PCHContainerOps, DiagConsumer); +} + +#undef DEBUG_TYPE diff --git a/clang-tools-extra/clang-misexpect/tool/CMakeLists.txt b/clang-tools-extra/clang-misexpect/tool/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clang-misexpect/tool/CMakeLists.txt @@ -0,0 +1,24 @@ +set(LLVM_LINK_COMPONENTS + AllTargetsAsmParsers + AllTargetsDescs + AllTargetsInfos + support + ) + +add_clang_tool(clang-misexpect + ClangMisExpectMain.cpp + ) +add_dependencies(clang-misexpect + clang-resource-headers + ) +target_link_libraries(clang-misexpect + PRIVATE + clangBasic + clangMisExpect + clangFrontend + clangCodeGen + clangTooling + clangToolingCore + clangToolingSyntax + ) + diff --git a/clang-tools-extra/clang-misexpect/tool/ClangMisExpectMain.cpp b/clang-tools-extra/clang-misexpect/tool/ClangMisExpectMain.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clang-misexpect/tool/ClangMisExpectMain.cpp @@ -0,0 +1,231 @@ +//===-- ClangMisExpectMain.cpp - ClangMisexpect -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the main function for clang misexpect. It uses a +// libTooling exectutor to check each file in the compiler_commands.json against +// a provided PGO profile. When profile counters disagree with the compiler's +// threshold values for likely and unlike branches clang-misexpect will issue a +// diagnostic message. +// +//===----------------------------------------------------------------------===// + +#include "../ClangMisExpect.h" +#include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/LLVM.h" +#include "clang/Tooling/AllTUsExecution.h" +#include "clang/Tooling/ArgumentsAdjusters.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Execution.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/Optional.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Signals.h" +#include + +using namespace clang; +using namespace clang::tooling; +using namespace clang::misexpect; +using namespace llvm; +using Path = std::string; + +enum VerifyType { + VerifyOnly, + Full, + None, +}; + +static llvm::cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); +static llvm::cl::OptionCategory + ClangMisExpectCategory("clang-misexpect options"); + +static llvm::cl::opt ProfileDir( + "profile-dir", + llvm::cl::desc( + "Specify a path to the profile data to use during validation"), + llvm::cl::cat(ClangMisExpectCategory)); + +static llvm::cl::opt ProfFormat( + "profile-format", + llvm::cl::desc( + "Specify the format of the profile data used during validation"), + llvm::cl::init(ProfileKind::IR), + llvm::cl::values(clEnumValN(Clang, "clang", "Clang Instrumentation"), + clEnumValN(IR, "llvm", "IR Instrumentation"), + clEnumValN(CSIR, "csllvm", + "Context sensitive IR Instrumentation"), + clEnumValN(Sample, "sample", "Sampling Instrumentation")), + llvm::cl::cat(ClangMisExpectCategory)); + +static llvm::cl::opt Verification( + "verify", llvm::cl::desc("Specify the type of profile format verification"), + llvm::cl::init(VerifyType::Full), + llvm::cl::values( + clEnumValN(VerifyType::VerifyOnly, "only", + "Only checks that the profile format is " + "compatable with the selected options"), + clEnumValN(VerifyType::Full, "full", + "Performs full verification before running clang-misexpect " + "checks over compile_commands.json"), + clEnumValN(VerifyType::None, "none", + "Skips profile format verification. Useful when running on " + "individual files")), + llvm::cl::cat(ClangMisExpectCategory)); + +namespace { + +std::string profileKindToString(ProfileKind k) { + switch (k) { + case ProfileKind::Clang: + return "Frontend based profile from option --profile-format=clang"; + case ProfileKind::IR: + return "IR based profile from option --profile-format=llvm"; + case ProfileKind::CSIR: + return "Context-Sensitive IR based profile from option " + "--profile-format=csllvm"; + case ProfileKind::Sample: + return "Sampling based profile from option --profile-format=sample"; + } +} + +void reportProfileFormatError(ProfileKind k) { + auto &OS = llvm::errs(); + OS.changeColor(raw_ostream::Colors::RED, true); + OS << "Error: "; + OS.resetColor(); + OS << "Invalid profile format. Expected " << profileKindToString(ProfFormat) + << "\n"; + exit(1); +} + +// verifies that the give profile and given option match +void verifyProfileFormat() { + if (Verification == VerifyType::None) + return; + + auto &OS = llvm::errs(); + if (ProfFormat == ProfileKind::Sample) { + llvm::LLVMContext C; + auto ProfReader = + llvm::sampleprof::SampleProfileReader::create(ProfileDir, C); + if (ProfReader.getError() == llvm::sampleprof_error::unrecognized_format) + reportProfileFormatError(ProfFormat); + } else { + auto ProfReader = InstrProfReader::create(ProfileDir); + if (!ProfReader) + reportProfileFormatError(ProfFormat); + + auto Err = ProfReader.get()->readHeader(); + if (Err) { + OS.changeColor(raw_ostream::Colors::RED, true); + OS << "Error: "; + OS.resetColor(); + OS << llvm::toString(std::move(Err)) << "\n"; + reportProfileFormatError(ProfFormat); + } + + switch (ProfFormat) { + case ProfileKind::CSIR: { + if (!ProfReader.get()->hasCSIRLevelProfile()) + reportProfileFormatError(ProfFormat); + break; + } + case ProfileKind::IR: { + if (!ProfReader.get()->isIRLevelProfile() && + !ProfReader.get()->hasCSIRLevelProfile()) + reportProfileFormatError(ProfFormat); + break; + } + case ProfileKind::Clang: { + if (ProfReader.get()->isIRLevelProfile()) { + reportProfileFormatError(ProfFormat); + } + break; + } + case ProfileKind::Sample: + llvm_unreachable("Found Sample profile when processing Instr Profiles"); + break; + }; + } +} + +} // namespace + +int main(int argc, const char **argv) { + llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + + // TODO: Allow more concurrency when the LLVM backend is threadsafe when used + // with libTooling Executors + // + // clang-misexpect performs checks using the LLVM backend that are accessed + // through a CodeGenAction. TSAN revealed that when using an executor some + // data races exist when initializing the backend for each compiler invocation + // Once these races have been addressed, we can stop limiting concurrency + ExecutorConcurrency.setInitialValue(1); + ExecutorName.setInitialValue("all-TUs"); + + CommonOptionsParser OptionsParser(argc, argv, ClangMisExpectCategory, + llvm::cl::ZeroOrMore); + + verifyProfileFormat(); + if (Verification == VerifyType::VerifyOnly) + return 0; + + auto &OS = llvm::errs(); + auto Executor = + createExecutorFromCommandLineArgs(argc, argv, ClangMisExpectCategory); + + if (!Executor) { + OS << "Failed to create executor --- " + << llvm::toString(Executor.takeError()) << "\n"; + return 1; + } + + auto ArgAdjuster = getStripPluginsAdjuster(); + auto StripProfileWarnings = [](const CommandLineArguments &Args, + StringRef /*unused*/ Unused) { + CommandLineArguments AdjustedArgs; + std::set FilteredArgs = {"-Wprofile-instr-unprofiled", + "-fcoverage-mapping", "-Werror"}; + for (size_t I = 0, E = Args.size(); I != E; I++) { + if (FilteredArgs.find(Args[I]) != FilteredArgs.end()) + continue; + AdjustedArgs.push_back(Args[I]); + } + return AdjustedArgs; + }; + + ArgAdjuster = combineAdjusters(StripProfileWarnings, ArgAdjuster); + + ArgAdjuster = combineAdjusters( + getInsertArgumentAdjuster({"-Wmisexpect", "-Wno-profile-instr-unprofiled", + "-Wno-profile-instr-out-of-date"}, + tooling::ArgumentInsertPosition::END), + ArgAdjuster); + + auto Err = Executor->get()->execute( + std::make_unique(ProfileDir, ProfFormat), + ArgAdjuster); + + if (Err) { + OS.changeColor(raw_ostream::Colors::RED, true); + OS << "Error: "; + OS.resetColor(); + OS << llvm::toString(std::move(Err)) << "\n"; + } + + // Emit collected data. + Executor->get()->getToolResults()->forEachResult( + [&OS](llvm::StringRef Key, llvm::StringRef Value) { + OS << "----" << Key.str() << "\n" << Value.str() << "\n"; + }); + return 0; +} diff --git a/clang-tools-extra/clang-misexpect/tool/run-clang-misexpect.py b/clang-tools-extra/clang-misexpect/tool/run-clang-misexpect.py new file mode 100755 --- /dev/null +++ b/clang-tools-extra/clang-misexpect/tool/run-clang-misexpect.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python +# +#===- run-clang-misexpect.py - Parallel clang-misexpect ------*- python -*--===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===------------------------------------------------------------------------===# +# FIXME: Integrate with clang-misexpect-diff.py + +""" +Parallel clang-misexpect runner +========================== + +Runs clang-misexpect over all files in a compilation database. Requires clang-misexpect +$PATH. + +Example invocations. +- Run clang-misexpect on all files in the compiler database. + run-clang-misexpect.py $PWD -profile-path somefile.profdata + +- Run clang-misexpect on all files in the compiler database, using a specific clang-misexpect binary. + run-clang-misexpect.py $PWD -profile-path somefile.profdata -profile-format=llvm -clang-misexpect-binary=/path/to/clang-misexpect + +Compilation database setup: +http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html +""" + +from __future__ import print_function + +import argparse +import json +import multiprocessing +import os +import re +import subprocess +import sys +import threading + +is_py2 = sys.version[0] == '2' + +if is_py2: + import Queue as queue +else: + import queue as queue + +def find_compilation_database(path): + """Adjusts the directory until a compilation database is found.""" + result = './' + while not os.path.isfile(os.path.join(result, path)): + if os.path.realpath(result) == '/': + print('Error: could not find compilation database.') + sys.exit(1) + result += '../' + return os.path.realpath(result) + + +def make_absolute(f, directory): + if os.path.isabs(f): + return f + return os.path.normpath(os.path.join(directory, f)) + + +def get_misexpect_invocation(f, clang_misexpect_binary, build_path, + profile_path, profile_type, extra_arg, + extra_arg_before, quiet): + """Gets a command line for clang-misexpect.""" + start = [clang_misexpect_binary] + if profile_path is not None: + start.append('-profile-dir=' + profile_path) + for arg in extra_arg: + start.append('-extra-arg=%s' % arg) + for arg in extra_arg_before: + start.append('-extra-arg-before=%s' % arg) + start.append('-p=' + build_path) + start.append('-profile-format=' + profile_type) + # make sure we use a standalone executor + start.append('-executor=standalone') + # Don't repeat profile verification + start.append('-verify=none') + if quiet: + start.append('-quiet') + start.append(f) + return start + + +def run_misexpect(args, build_path, profile_path, profile_type, queue, lock, + failed_files): + """Takes filenames out of queue and runs clang-misexpect on them.""" + while True: + name = queue.get() + invocation = get_misexpect_invocation(name, args.clang_misexpect_binary, + build_path, profile_path, profile_type, + args.extra_arg, args.extra_arg_before, + args.quiet) + proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, err = proc.communicate() + if proc.returncode != 0: + failed_files.append(name) + with lock: + sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8')) + if len(err) > 0: + sys.stdout.flush() + sys.stderr.write(err.decode('utf-8')) + queue.task_done() + + +def main(): + parser = argparse.ArgumentParser(description='Runs clang-misexpect over all files ' + 'in a compilation database. Requires ' + 'clang-misexpect and clang-apply-replacements in ' + '$PATH.') + parser.add_argument('-clang-misexpect-binary', metavar='PATH', + default='clang-misexpect', + help='path to clang-misexpect binary') + parser.add_argument('-j', type=int, default=0, + help='number of misexpect instances to be run in parallel.') + parser.add_argument('files', nargs='*', default=['.*'], + help='files to be processed (regex on path)') + parser.add_argument('-p', dest='build_path', + help='Path used to read a compile command database.') + parser.add_argument('-profile-path', dest='profile_path', + help='Path used to read a PGO profile.') + parser.add_argument('-profile-format', dest='profile_type', default="llvm", + help='PGO profile format.') + parser.add_argument('-extra-arg', dest='extra_arg', + action='append', default=[], + help='Additional argument to append to the compiler ' + 'command line.') + parser.add_argument('-extra-arg-before', dest='extra_arg_before', + action='append', default=[], + help='Additional argument to prepend to the compiler ' + 'command line.') + parser.add_argument('-quiet', action='store_true', + help='Run clang-misexpect in quiet mode') + args = parser.parse_args() + + db_path = 'compile_commands.json' + + if args.build_path is not None: + build_path = args.build_path + else: + # Find our database + build_path = find_compilation_database(db_path) + + if args.profile_path is not None: + profile_path = args.profile_path + else: + profile_path=find_compilation_database("default.profdata") + + profile_type = args.profile_type + + try: + # Make sure that the profile is the correct format & compdb exists + invocation = [args.clang_misexpect_binary, '-verify=only'] + invocation.append('-p=' + build_path) + invocation.append('-profile-dir=' + profile_path) + invocation.append('-profile-format=' + profile_type) + if args.quiet: + # Even with -quiet we still want to check if we can call clang-tidy. + with open(os.devnull, 'w') as dev_null: + subprocess.check_call(invocation, stdout=dev_null) + else: + subprocess.check_call(invocation) + except: + print("Unable to run clang-misexpect.", file=sys.stderr) + sys.exit(1) + + + # Load the database and extract all files. + database = json.load(open(os.path.join(build_path, db_path))) + files = [make_absolute(entry['file'], entry['directory']) + for entry in database] + + max_task = args.j + if max_task == 0: + max_task = multiprocessing.cpu_count() + + # Build up a big regexy filter from all command line arguments. + file_name_re = re.compile('|'.join(args.files)) + + return_code = 0 + try: + # Spin up a bunch of misexpect-launching threads. + task_queue = queue.Queue(max_task) + # List of files with a non-zero return code. + failed_files = [] + lock = threading.Lock() + for _ in range(max_task): + t = threading.Thread(target=run_misexpect, + args=(args, build_path, profile_path, profile_type, + task_queue, lock, failed_files)) + t.daemon = True + t.start() + + # Fill the queue with files. + for name in files: + if file_name_re.search(name): + task_queue.put(name) + + # Wait for all threads to be done. + task_queue.join() + if len(failed_files): + return_code = 1 + + except KeyboardInterrupt: + # This is a sad hack. Unfortunately subprocess goes + # bonkers with ctrl-c and we start forking merrily. + print('\nCtrl-C detected, goodbye.') + os.kill(0, 9) + + sys.exit(return_code) + +if __name__ == '__main__': + main() diff --git a/clang-tools-extra/docs/clang-misexpect.rst b/clang-tools-extra/docs/clang-misexpect.rst new file mode 100644 --- /dev/null +++ b/clang-tools-extra/docs/clang-misexpect.rst @@ -0,0 +1,215 @@ +=================== +Clang-Misexpect +=================== + +.. contents:: + +.. toctree:: + :maxdepth: 1 + +A standalone tool for verifying the accuracy of ``__builtin_expect()`` annotations +--------------------------------------------------------------------------------- + +:program:`clang-misexpect` is a standalone tool built on top of +:program:`clang`'s `LibTooling` infrastructure. + + +The tool is in a very early development stage, so you might encounter bugs and +crashes. Submitting reports with information about how to reproduce the issue +to `the LLVM bugtracker `_ will definitely help the +project. If you have any ideas or suggestions, please to put a feature request +there. + +At it's core it is a simple wrapper around the compiler that enables a +user to perform misexpect verification across an entire project through +use of a ``compile_commands.json``. If you have a suitable LLVM PGO profile, +and a compile commands database, then you can use the clang-misexpect +tool to find places in your codebase that may have problematic uses of +the ``__builtin_expect()`` annotations. + +:program:`clang-misexpect` automatically curates compiler flags found in the +compilation database when running the standalone tool. This allows us to +avoid issues for incompatible options, or when the compilation database +contains flags for incompatible types of profiling. Prior to each +compiler invocation we remove any conflicting flags and set the +appropriate options to generate misexpect diagnostics. We also disable +code generation, so only the minimal amount of the LLVM backend is used +to issue our diagnostics. + +Running clang-misexpect +----------------------- + +Running on over compilation database +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default clang-misexpect will run over an entire compilation database. +This can be achieved in one of two ways. + +The first, and most straightforward method is to run the tool directly, +which will run a portion of the compiler over each command in the +compilation database, and issue any warnings when the +``__builtin_expect()`` annotations mismatch with the recorded profiling +counters. + + +.. code-block:: console + + $ clang-misexpect -profile-dir=/path/to/profile.profdata -p=/path/to/compdb -profile-format=clang source.cpp --executor=standalone + +clang-misexpect does it's checks in the LLVM backend, which is a unique +case for `LibTooling` based projects. The backend is not 100% safe to use +in a multithreaded manner yet, so we lock the concurrency of +clang-misexpect to run single threaded. + +To avoid this penalty, we provide a python wrapper that can use the +clang-misexpect tool directly on each entry in the compilation database. + +This allows us to make use of parallel processing and speed up the +checks for large projects. The downside is that the diagnostics are no +longer formatted as nicely when using the tool directly. + +Once all concurrency issues are addressed, we plan to remove the +concurrency limitations, though the concurrency level will still be +configurable from the command line. + +:: + + run-clang-misexpect.py $PWD -profile-path=$PWD/profile.profdata -clang-misexpect-binary=/path/to/clang-misexpect -profile-format=clang + +Running on a single file +~~~~~~~~~~~~~~~~~~~~~~~~ + +It is also possible to run :program:`clang-misexpect` on a single file. This is +achievable by changing the `LibTooling` executor to a standalone executor + +:: + + $ clang-misexpect -profile-dir=/path/to/profile.profdata -p=/path/to/compdb -profile-format=clang source.cpp --executor=standalone + +Usage +----- + +:program:`clang-misexpect` is designed to simplify using :program:`clang`'s +``-Wmisexpect`` compiler flag to verify your project's use of +``__builtin_expect()`` annotations. + +.. code-block:: console + + $ clang-misexpect [options] [... ] + +Options +------- + +:program:`clang-misexpect` offers the following options: + +.. code-block:: console + + USAGE: clang-misexpect [options] [... ] + + OPTIONS: + + Generic Options: + + --help - Display available options (--help-hidden for more) + --help-list - Display list of available options (--help-list-hidden for more) + --version - Display the version of this program + + clang-misexpect options: + + --extra-arg= - Additional argument to append to the compiler command line + --extra-arg-before= - Additional argument to prepend to the compiler command line + -p= - Build path + --profile-dir= - Specify a path to the profile data to use during validation + --profile-format= - Specify the format of the profile data used during validation + =clang - Clang Instrumentation + =llvm - IR Instrumentation + =csllvm - Context sensitive IR Instrumentation + =sample - Sampling Instrumentation + --verify= - Specify the type of profile format verification + =only - Only checks that the profile format is compatable with the selected options + =full - Performs full verification before running clang-misexpect checks over compile_commands.json + =none - Skips profile format verification. Useful when running on individual files + + -p is used to read a compile command database. + For example, it can be a CMake build directory in which a file named + compile_commands.json exists (use -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + CMake option to get this output). When no build path is specified, + a search for compile_commands.json will be attempted through all + parent paths of the first input file . See: + https://clang.llvm.org/docs/HowToSetupToolingForLLVM.html for an + example of setting up Clang Tooling on a source tree. + + ... specify the paths of source files. These paths are + looked up in the compile command database. If the path of a file is + absolute, it needs to point into CMake's source tree. If the path is + relative, the current working directory needs to be in the CMake + source tree and the file must be in a subdirectory of the current + working directory. "./" prefixes in the relative files will be + automatically removed, but the rest of a relative path must be a + suffix of a path in the compile command database. + + +Important Options +----------------- + +-p +~~ + +Tells clang-misexpect where to find the compilation database + +--profile-dir +^^^^^^^^^^^^^ + +Provides a path to the PGO profile + +--profile-format +^^^^^^^^^^^^^^^^ + +LLVM supports 4 types of profile formats: Frontend, IR, CS-IR, and +Sampling. + ++----------------+--------------------------------------------------------------------------------------+ +| Profile Type | Description | ++================+======================================================================================+ +| Frontend | Profiling instrumentation added during compilation by the frontend, i.e. ``clang`` | ++----------------+--------------------------------------------------------------------------------------+ +| IR | Profiling instrumentation added during by the LLVM backend | ++----------------+--------------------------------------------------------------------------------------+ +| CS-IR | Context Sensitive IR based profiles | ++----------------+--------------------------------------------------------------------------------------+ +| Sampling | Profiles collected through sampling with external tools, such as ``perf`` on Linux | ++----------------+--------------------------------------------------------------------------------------+ + +:program:`clang-misexpect` is compatible with all Profiling formats, but the +profiling type must be given to the tool, so that the correct +compilation options can be set. + +Background on MisExpect Diagnostics +----------------------------------- + +MisExpect checks in the LLVM backend follow a simple procedure: if the +profiling counter associated with an ``llvm.expect`` instruction was too +low along the expected path, then to emit a diagnostic message to the +user. + +The most natural place to perform the verification is just prior to when +branch weights being assigned to the target instruction in the form of +branch weight metadata. + +There are 3 key places in the LLVM backend where branch weights are +created and assigned based on profiling information or the use of the +``llvm.expect`` intrinsic, and our implementation focuses on these +places to perform the verification. + +We calculate the threshold for emitting misexpect related diagnostics +based on the values the compiler assigns to ``llvm.expect`` intrinsics, +which can be set through the ``-likely-branch-weight`` and +``-unlikely-branch-weight`` LLVM options. During verification, if the +profile count is less than the calculated threshold, then we will emit a +remark or warning detailing a potential performance regression. The +diagnostic also reports the percentage of the time the annotation was +correct during profiling to help developers reason about how to proceed. + +The diagnostics are also available in the form of optimization remarks, +which can be serialized and processed through the ``opt-viewer.py`` +scripts in LLVM. diff --git a/clang-tools-extra/test/CMakeLists.txt b/clang-tools-extra/test/CMakeLists.txt --- a/clang-tools-extra/test/CMakeLists.txt +++ b/clang-tools-extra/test/CMakeLists.txt @@ -47,6 +47,7 @@ clang-apply-replacements clang-change-namespace clang-doc + clang-misexpect clang-include-fixer clang-move clang-query diff --git a/clang-tools-extra/test/clang-misexpect/Inputs/basic.proftext b/clang-tools-extra/test/clang-misexpect/Inputs/basic.proftext new file mode 100644 --- /dev/null +++ b/clang-tools-extra/test/clang-misexpect/Inputs/basic.proftext @@ -0,0 +1,8 @@ +bar +# Func Hash: +45795613684824 +# Num Counters: +2 +# Counter Values: +200000 +0 diff --git a/clang-tools-extra/test/clang-misexpect/Inputs/clean.c b/clang-tools-extra/test/clang-misexpect/Inputs/clean.c new file mode 100644 --- /dev/null +++ b/clang-tools-extra/test/clang-misexpect/Inputs/clean.c @@ -0,0 +1,19 @@ +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +int foo(int); +int baz(int); +int buzz(); + +const int inner_loop = 100; +const int outer_loop = 2000; + +int bar() { + int rando = buzz(); + int x = 0; + if (unlikely(rando % (outer_loop * inner_loop) == 0)) { + x = baz(rando); + } else { + x = foo(50); + } + return x; +} diff --git a/clang-tools-extra/test/clang-misexpect/basic.cpp b/clang-tools-extra/test/clang-misexpect/basic.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/test/clang-misexpect/basic.cpp @@ -0,0 +1,41 @@ +// Test that clang-misexpect can run over a compilation database + +// Now create a directory with a compilation database file and ensure we don't +// use it after failing to parse commands from the command line: +// +// RUN: mkdir -p %T/misexpect/ +// RUN: echo '[{"directory": "%/T/misexpect/","arguments": ["clang++","-O2","-c","%/T/misexpect/basic.cpp"], "file": "basic.cpp"},{"directory": "%/T/misexpect/","command": "clang -c %/T/misexpect/clean.c", "file": "%/T/misexpect/clean.c"}]' > %T/misexpect/compile_commands.json +// RUN: cat %s > %T/misexpect/basic.cpp +// RUN: cp %S/Inputs/clean.c %T/misexpect/clean.c +// RUN: llvm-profdata merge %S/Inputs/basic.proftext -o %t.profdata +// RUN: clang-misexpect --profile-dir=%t.profdata -p=%T/misexpect %T/misexpect/basic.cpp -profile-format=clang +// RUN: not clang-misexpect --profile-dir=%t.profdata -p=%T/misexpect %T/misexpect/basic.cpp -profile-format=llvm +// RUN: %run_clang_misexpect -profile-path=%t.profdata -p=%T/misexpect %T/misexpect/basic.cpp -profile-format=clang +// RUN: not %run_clang_misexpect -profile-path=%t.profdata -p=%T/misexpect %T/misexpect/basic.cpp -profile-format=sample + +// CHECK: basic.cpp:35 warning: Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% ({{[0-9]+ / [0-9]+}}) of profiled executions. +// CHECK-NEXT: basic.cpp:35 remark: Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% ({{[0-9]+ / [0-9]+}}) of profiled executions. + +// CHECK-NOT: clean.c:13 warning: Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% ({{[0-9]+ / [0-9]+}}) of profiled executions. +// CHECK-NOT: clean.c:13 remark: Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% ({{[0-9]+ / [0-9]+}}) of profiled executions. + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +int foo(int); +int baz(int); +int buzz(); + +const int inner_loop = 100; +const int outer_loop = 2000; + +int bar() { + int rando = buzz(); + int x = 0; + if (likely(rando % (outer_loop * inner_loop) == 0)) { + x = baz(rando); + } else { + x = foo(50); + } + return x; +} diff --git a/clang-tools-extra/test/lit.cfg.py b/clang-tools-extra/test/lit.cfg.py --- a/clang-tools-extra/test/lit.cfg.py +++ b/clang-tools-extra/test/lit.cfg.py @@ -144,6 +144,12 @@ ('%run_clang_tidy', '%s %s' % (python_exec, run_clang_tidy)) ) +run_clang_misexpect = os.path.join( + config.test_source_root, "..", "clang-misexpect", "tool", "run-clang-misexpect.py") +config.substitutions.append( + ('%run_clang_misexpect', + '%s %s' % (python_exec, run_clang_misexpect)) ) + clangd_benchmarks_dir = os.path.join(os.path.dirname(config.clang_tools_dir), "tools", "clang", "tools", "extra", "clangd", "benchmarks") diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -207,6 +207,7 @@ LTO clang-apply-replacements clang-doc + clang-misexpect clang-format clang-resource-headers clang-include-fixer diff --git a/clang/test/Profile/misexpect-switch-nonconst.c b/clang/test/Profile/misexpect-switch-nonconst.c --- a/clang/test/Profile/misexpect-switch-nonconst.c +++ b/clang/test/Profile/misexpect-switch-nonconst.c @@ -1,7 +1,7 @@ // Test that misexpect emits no warning when switch condition is non-const // RUN: llvm-profdata merge %S/Inputs/misexpect-switch-nonconst.proftext -o %t.profdata -// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify +// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -Wmisexpect // expected-no-diagnostics int sum(int *buff, int size); diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp b/llvm/lib/Transforms/Utils/MisExpect.cpp --- a/llvm/lib/Transforms/Utils/MisExpect.cpp +++ b/llvm/lib/Transforms/Utils/MisExpect.cpp @@ -43,7 +43,8 @@ static cl::opt PGOWarnMisExpect( "pgo-warn-misexpect", cl::init(false), cl::Hidden, cl::desc("Use this option to turn on/off " - "warnings about incorrect usage of llvm.expect intrinsics.")); + "warnings about incorrect usage of llvm.expect intrinsics."), + cl::ZeroOrMore); } // namespace llvm