Index: clang-tools-extra/trunk/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/clangd/CMakeLists.txt +++ clang-tools-extra/trunk/clangd/CMakeLists.txt @@ -64,6 +64,7 @@ RIFF.cpp Selection.cpp SourceCode.cpp + QueryDriverDatabase.cpp Threading.cpp Trace.cpp TUScheduler.cpp Index: clang-tools-extra/trunk/clangd/ClangdLSPServer.cpp =================================================================== --- clang-tools-extra/trunk/clangd/ClangdLSPServer.cpp +++ clang-tools-extra/trunk/clangd/ClangdLSPServer.cpp @@ -9,12 +9,14 @@ #include "ClangdLSPServer.h" #include "Diagnostics.h" #include "FormattedString.h" +#include "GlobalCompilationDatabase.h" #include "Protocol.h" #include "SourceCode.h" #include "Trace.h" #include "URI.h" #include "refactor/Tweak.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Errc.h" @@ -335,9 +337,13 @@ ErrorCode::InvalidRequest)); if (const auto &Dir = Params.initializationOptions.compilationDatabasePath) CompileCommandsDir = Dir; - if (UseDirBasedCDB) + if (UseDirBasedCDB) { BaseCDB = llvm::make_unique( CompileCommandsDir); + BaseCDB = getQueryDriverDatabase( + llvm::makeArrayRef(ClangdServerOpts.QueryDriverGlobs), + std::move(BaseCDB)); + } CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags, ClangdServerOpts.ResourceDir); Server.emplace(*CDB, FSProvider, static_cast(*this), Index: clang-tools-extra/trunk/clangd/ClangdServer.h =================================================================== --- clang-tools-extra/trunk/clangd/ClangdServer.h +++ clang-tools-extra/trunk/clangd/ClangdServer.h @@ -127,6 +127,10 @@ /// Enable hidden features mostly useful to clangd developers. /// e.g. tweaks to dump the AST. bool HiddenFeatures = false; + + /// Clangd will execute compiler drivers matching one of these globs to + /// fetch system include path. + std::vector QueryDriverGlobs; }; // Sensible default options for use in tests. // Features like indexing must be enabled if desired. Index: clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.h =================================================================== --- clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.h +++ clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.h @@ -91,6 +91,13 @@ llvm::Optional CompileCommandsDir; }; +/// Extracts system include search path from drivers matching QueryDriverGlobs +/// and adds them to the compile flags. Base may not be nullptr. +/// Returns Base when \p QueryDriverGlobs is empty. +std::unique_ptr +getQueryDriverDatabase(llvm::ArrayRef QueryDriverGlobs, + std::unique_ptr Base); + /// Wraps another compilation database, and supports overriding the commands /// using an in-memory mapping. class OverlayCDB : public GlobalCompilationDatabase { Index: clang-tools-extra/trunk/clangd/QueryDriverDatabase.cpp =================================================================== --- clang-tools-extra/trunk/clangd/QueryDriverDatabase.cpp +++ clang-tools-extra/trunk/clangd/QueryDriverDatabase.cpp @@ -0,0 +1,265 @@ +//===--- QueryDriverDatabase.cpp ---------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Some compiler drivers have implicit search mechanism for system headers. +// This compilation database implementation tries to extract that information by +// executing the driver in verbose mode. gcc-compatible drivers print something +// like: +// .... +// .... +// #include <...> search starts here: +// /usr/lib/gcc/x86_64-linux-gnu/7/include +// /usr/local/include +// /usr/lib/gcc/x86_64-linux-gnu/7/include-fixed +// /usr/include/x86_64-linux-gnu +// /usr/include +// End of search list. +// .... +// .... +// This component parses that output and adds each path to command line args +// provided by Base, after prepending them with -isystem. Therefore current +// implementation would not work with a driver that is not gcc-compatible. +// +// First argument of the command line received from underlying compilation +// database is used as compiler driver path. Due to this arbitrary binary +// execution, this mechanism is not used by default and only executes binaries +// in the paths that are explicitly whitelisted by the user. + +#include "GlobalCompilationDatabase.h" +#include "Logger.h" +#include "Path.h" +#include "Trace.h" +#include "clang/Driver/Types.h" +#include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/ScopedPrinter.h" +#include +#include +#include + +namespace clang { +namespace clangd { +namespace { + +std::vector parseDriverOutput(llvm::StringRef Output) { + std::vector SystemIncludes; + constexpr char const *SIS = "#include <...> search starts here:"; + constexpr char const *SIE = "End of search list."; + llvm::SmallVector Lines; + Output.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); + + auto StartIt = std::find(Lines.begin(), Lines.end(), SIS); + if (StartIt == Lines.end()) { + elog("System include extraction: start marker not found: {0}", Output); + return {}; + } + ++StartIt; + const auto EndIt = std::find(StartIt, Lines.end(), SIE); + if (EndIt == Lines.end()) { + elog("System include extraction: end marker missing: {0}", Output); + return {}; + } + + for (llvm::StringRef Line : llvm::make_range(StartIt, EndIt)) { + SystemIncludes.push_back(Line.str()); + vlog("System include extraction: adding {0}", Line); + } + return SystemIncludes; +} + +std::vector extractSystemIncludes(PathRef Driver, + llvm::StringRef Ext, + llvm::Regex &QueryDriverRegex) { + trace::Span Tracer("Extract system includes"); + SPAN_ATTACH(Tracer, "driver", Driver); + SPAN_ATTACH(Tracer, "ext", Ext); + + if (!QueryDriverRegex.match(Driver)) { + vlog("System include extraction: not whitelisted driver {0}", Driver); + return {}; + } + + if (!llvm::sys::fs::exists(Driver)) { + elog("System include extraction: {0} does not exist.", Driver); + return {}; + } + if (!llvm::sys::fs::can_execute(Driver)) { + elog("System include extraction: {0} is not executable.", Driver); + return {}; + } + + llvm::SmallString<128> OutputPath; + auto EC = llvm::sys::fs::createTemporaryFile("system-includes", "clangd", + OutputPath); + if (EC) { + elog("System include extraction: failed to create temporary file with " + "error {0}", + EC.message()); + return {}; + } + auto CleanUp = llvm::make_scope_exit( + [&OutputPath]() { llvm::sys::fs::remove(OutputPath); }); + + llvm::Optional Redirects[] = { + {""}, llvm::StringRef(OutputPath), {""}}; + + auto Type = driver::types::lookupTypeForExtension(Ext); + if (Type == driver::types::TY_INVALID) { + elog("System include extraction: invalid file type for {0}", Ext); + return {}; + } + // Should we also preserve flags like "-sysroot", "-nostdinc" ? + const llvm::StringRef Args[] = {"-E", "-x", driver::types::getTypeName(Type), + "-", "-v"}; + + int RC = + llvm::sys::ExecuteAndWait(Driver, Args, /*Env=*/llvm::None, Redirects); + if (RC) { + elog("System include extraction: driver execution failed with return code: " + "{0}", + llvm::to_string(RC)); + return {}; + } + + auto BufOrError = llvm::MemoryBuffer::getFile(OutputPath); + if (!BufOrError) { + elog("System include extraction: failed to read {0} with error {1}", + OutputPath, BufOrError.getError().message()); + return {}; + } + + auto Includes = parseDriverOutput(BufOrError->get()->getBuffer()); + log("System include extractor: succesfully executed {0}, got includes: " + "\"{1}\"", + Driver, llvm::join(Includes, ", ")); + return Includes; +} + +tooling::CompileCommand & +addSystemIncludes(tooling::CompileCommand &Cmd, + llvm::ArrayRef SystemIncludes) { + for (llvm::StringRef Include : SystemIncludes) { + // FIXME(kadircet): This doesn't work when we have "--driver-mode=cl" + Cmd.CommandLine.push_back("-isystem"); + Cmd.CommandLine.push_back(Include.str()); + } + return Cmd; +} + +/// Converts a glob containing only ** or * into a regex. +std::string convertGlobToRegex(llvm::StringRef Glob) { + std::string RegText; + llvm::raw_string_ostream RegStream(RegText); + RegStream << '^'; + for (size_t I = 0, E = Glob.size(); I < E; ++I) { + if (Glob[I] == '*') { + if (I + 1 < E && Glob[I + 1] == '*') { + // Double star, accept any sequence. + RegStream << ".*"; + // Also skip the second star. + ++I; + } else { + // Single star, accept any sequence without a slash. + RegStream << "[^/]*"; + } + } else { + RegStream << llvm::Regex::escape(Glob.substr(I, 1)); + } + } + RegStream << '$'; + RegStream.flush(); + return RegText; +} + +/// Converts a glob containing only ** or * into a regex. +llvm::Regex convertGlobsToRegex(llvm::ArrayRef Globs) { + assert(!Globs.empty() && "Globs cannot be empty!"); + std::vector RegTexts; + RegTexts.reserve(Globs.size()); + for (llvm::StringRef Glob : Globs) + RegTexts.push_back(convertGlobToRegex(Glob)); + + llvm::Regex Reg(llvm::join(RegTexts, "|")); + assert(Reg.isValid(RegTexts.front()) && + "Created an invalid regex from globs"); + return Reg; +} + +/// Extracts system includes from a trusted driver by parsing the output of +/// include search path and appends them to the commands coming from underlying +/// compilation database. +class QueryDriverDatabase : public GlobalCompilationDatabase { +public: + QueryDriverDatabase(llvm::ArrayRef QueryDriverGlobs, + std::unique_ptr Base) + : QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs)), + Base(std::move(Base)) { + assert(this->Base); + BaseChanged = + this->Base->watch([this](const std::vector &Changes) { + OnCommandChanged.broadcast(Changes); + }); + } + + llvm::Optional + getCompileCommand(PathRef File, ProjectInfo *PI = nullptr) const override { + auto Cmd = Base->getCompileCommand(File, PI); + if (!Cmd || Cmd->CommandLine.empty()) + return Cmd; + + llvm::SmallString<128> Driver(Cmd->CommandLine.front()); + llvm::sys::fs::make_absolute(Cmd->Directory, Driver); + + llvm::ArrayRef SystemIncludes; + { + std::lock_guard Lock(Mu); + + llvm::StringRef Ext = llvm::sys::path::extension(File).trim('.'); + auto It = DriverToIncludesCache.try_emplace({Driver, Ext}); + if (It.second) + It.first->second = extractSystemIncludes(Driver, Ext, QueryDriverRegex); + SystemIncludes = It.first->second; + } + + return addSystemIncludes(*Cmd, SystemIncludes); + } + +private: + mutable std::mutex Mu; + // Caches includes extracted from a driver. + mutable llvm::DenseMap, + std::vector> + DriverToIncludesCache; + mutable llvm::Regex QueryDriverRegex; + + std::unique_ptr Base; + CommandChanged::Subscription BaseChanged; +}; +} // namespace + +std::unique_ptr +getQueryDriverDatabase(llvm::ArrayRef QueryDriverGlobs, + std::unique_ptr Base) { + assert(Base && "Null base to SystemIncludeExtractor"); + if (QueryDriverGlobs.empty()) + return Base; + return llvm::make_unique(QueryDriverGlobs, + std::move(Base)); +} + +} // namespace clangd +} // namespace clang Index: clang-tools-extra/trunk/clangd/test/system-include-extractor.test =================================================================== --- clang-tools-extra/trunk/clangd/test/system-include-extractor.test +++ clang-tools-extra/trunk/clangd/test/system-include-extractor.test @@ -0,0 +1,50 @@ +# RUN: rm -rf %t.dir && mkdir -p %t.dir + +# Generate a mock-driver that will print %temp_dir%/my/dir and +# %temp_dir%/my/dir2 as include search paths. +# RUN: echo '#!/bin/bash' >> %t.dir/my_driver.sh +# RUN: echo 'echo line to ignore' >> %t.dir/my_driver.sh +# RUN: echo 'echo \#include \<...\> search starts here:' >> %t.dir/my_driver.sh +# RUN: echo 'echo %t.dir/my/dir/' >> %t.dir/my_driver.sh +# RUN: echo 'echo %t.dir/my/dir2/' >> %t.dir/my_driver.sh +# RUN: echo 'echo End of search list.' >> %t.dir/my_driver.sh +# RUN: chmod +x %t.dir/my_driver.sh + +# Create header files my/dir/a.h and my/dir2/b.h +# RUN: mkdir -p %t.dir/my/dir +# RUN: touch %t.dir/my/dir/a.h +# RUN: mkdir -p %t.dir/my/dir2 +# RUN: touch %t.dir/my/dir2/b.h + +# Generate a compile_commands.json that will query the mock driver we've +# created. Which should add a.h and b.h into include search path. +# RUN: echo '[{"directory": "%/t.dir", "command": "%/t.dir/my_driver.sh the-file.cpp", "file": "the-file.cpp"}]' > %t.dir/compile_commands.json + +# RUN: sed -e "s|INPUT_DIR|%/t.dir|g" %s > %t.test.1 +# On Windows, we need the URI in didOpen to look like "uri":"file:///C:/..." +# (with the extra slash in the front), so we add it here. +# RUN: sed -e "s|file://\([A-Z]\):/|file:///\1:/|g" %t.test.1 > %t.test + +# Bless the mock driver we've just created so that clangd can execute it. +# RUN: clangd -lit-test -query-driver="**.test,**.sh" < %t.test | FileCheck -strict-whitespace %t.test +{"jsonrpc":"2.0","id":0,"method":"initialize","params":{}} +--- +{ + "jsonrpc":"2.0", + "method":"textDocument/didOpen", + "params": { + "textDocument": { + "uri": "file://INPUT_DIR/the-file.cpp", + "languageId":"cpp", + "version":1, + "text":"#include \n#include " + } + } +} +# CHECK: "method": "textDocument/publishDiagnostics", +# CHECK-NEXT: "params": { +# CHECK-NEXT: "diagnostics": [], +--- +{"jsonrpc":"2.0","id":10000,"method":"shutdown"} +--- +{"jsonrpc":"2.0","method":"exit"} Index: clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp =================================================================== --- clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp +++ clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp @@ -269,6 +269,15 @@ llvm::cl::desc("Enable hidden features mostly useful to clangd developers"), llvm::cl::init(false), llvm::cl::Hidden); +static llvm::cl::list QueryDriverGlobs( + "query-driver", + llvm::cl::desc( + "Comma separated list of globs for white-listing gcc-compatible " + "drivers that are safe to execute. Drivers matching any of these globs " + "will be used to extract system includes. e.g. " + "/usr/bin/**/clang-*,/path/to/repo/**/g++-*"), + llvm::cl::CommaSeparated); + namespace { /// \brief Supports a test URI scheme with relaxed constraints for lit tests. @@ -523,6 +532,7 @@ }; } Opts.SuggestMissingIncludes = SuggestMissingIncludes; + Opts.QueryDriverGlobs = std::move(QueryDriverGlobs); llvm::Optional OffsetEncodingFromFlag; if (ForceOffsetEncoding != OffsetEncoding::UnsupportedEncoding) OffsetEncodingFromFlag = ForceOffsetEncoding;