Index: clang/include/clang/Tooling/CompilationDatabase.h =================================================================== --- clang/include/clang/Tooling/CompilationDatabase.h +++ clang/include/clang/Tooling/CompilationDatabase.h @@ -31,6 +31,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/CommandLine.h" #include #include #include @@ -219,6 +220,15 @@ std::unique_ptr inferTargetAndDriverMode(std::unique_ptr Base); +/// Returns a wrapped CompilationDatabase that will expand all response files on +/// commandline returned by underlying database. +/// +/// \param [in] Tokenizer Tokenization strategy. Typically Unix, Windows or +/// nullptr. use nullptr for auto detect. +std::unique_ptr +expandResponseFiles(std::unique_ptr Base, + llvm::cl::TokenizerCallback Tokenizer = nullptr); + } // namespace tooling } // namespace clang Index: clang/lib/Tooling/CMakeLists.txt =================================================================== --- clang/lib/Tooling/CMakeLists.txt +++ clang/lib/Tooling/CMakeLists.txt @@ -17,6 +17,7 @@ CommonOptionsParser.cpp CompilationDatabase.cpp Execution.cpp + ExpandResponseFilesCompilationDatabase.cpp FileMatchTrie.cpp FixIt.cpp GuessTargetAndModeCompilationDatabase.cpp Index: clang/lib/Tooling/CompilationDatabase.cpp =================================================================== --- clang/lib/Tooling/CompilationDatabase.cpp +++ clang/lib/Tooling/CompilationDatabase.cpp @@ -399,7 +399,9 @@ loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override { SmallString<1024> DatabasePath(Directory); llvm::sys::path::append(DatabasePath, "compile_flags.txt"); - return FixedCompilationDatabase::loadFromFile(DatabasePath, ErrorMessage); + auto Base = + FixedCompilationDatabase::loadFromFile(DatabasePath, ErrorMessage); + return Base ? expandResponseFiles(std::move(Base)) : nullptr; } }; Index: clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp =================================================================== --- /dev/null +++ clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp @@ -0,0 +1,192 @@ +//===- ExpandResponseFileCompilationDataBase.cpp --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/StringSaver.h" + +namespace clang { +namespace tooling { +namespace { + +bool expandResponseFile(llvm::StringRef FName, llvm::StringSaver &Saver, + llvm::cl::TokenizerCallback Tokenizer, + SmallVectorImpl &NewArgv) { + llvm::ErrorOr> MemBufOrErr = + llvm::MemoryBuffer::getFile(FName); + if (!MemBufOrErr) + return false; + llvm::MemoryBuffer &MemBuf = *MemBufOrErr.get(); + StringRef Str(MemBuf.getBufferStart(), MemBuf.getBufferSize()); + + // If we have a UTF-16 byte order mark, convert to UTF-8 for parsing. + ArrayRef BufRef(MemBuf.getBufferStart(), MemBuf.getBufferEnd()); + std::string UTF8Buf; + // It is called byte order marker but the UTF-8 BOM is actually not affected + // by the host system's endianness. + auto HasUtF8ByteOrderMark = [](ArrayRef S) { + return (S.size() >= 3 && S[0] == '\xef' && S[1] == '\xbb' && + S[2] == '\xbf'); + }; + if (llvm::hasUTF16ByteOrderMark(BufRef)) { + if (!convertUTF16ToUTF8String(BufRef, UTF8Buf)) + return false; + Str = StringRef(UTF8Buf); + } + // If we see UTF-8 BOM sequence at the beginning of a file, we shall remove + // these bytes before parsing. + // Reference: http://en.wikipedia.org/wiki/UTF-8#Byte_order_mark + else if (HasUtF8ByteOrderMark(BufRef)) + Str = StringRef(BufRef.data() + 3, BufRef.size() - 3); + // Tokenize the contents into NewArgv. + Tokenizer(Str, Saver, NewArgv, false); + return true; +} + +bool expandResponseFiles(tooling::CompileCommand &Cmd, + llvm::cl::TokenizerCallback Tokenizer) { + bool AllExpanded = true; + struct ResponseFileRecord { + llvm::SmallString<128> File; + size_t End; + }; + std::vector &Argv = Cmd.CommandLine; + // To detect recursive response files, we maintain a stack of files and the + // position of the last argument in the file. This position is updated + // dynamically as we recursively expand files. + SmallVector FileStack; + + // Push a dummy entry that represents the initial command line, removing + // the need to check for an empty list. + FileStack.push_back({llvm::StringRef(""), Argv.size()}); + + // Don't cache Argv.size() because it can change. + for (unsigned I = 0; I != Argv.size();) { + while (I == FileStack.back().End) { + // Passing the end of a file's argument list, so we can remove it from the + // stack. + FileStack.pop_back(); + } + + std::string &Arg = Argv[I]; + + if (Arg[0] != '@') { + ++I; + continue; + } + SmallString<128> ResponseFile; + if (llvm::sys::path::is_relative(&Arg[1])) { + llvm::sys::path::append(ResponseFile, Cmd.Directory, &Arg[1]); + } else { + ResponseFile.append(Arg.begin() + 1, Arg.end()); + } + llvm::sys::path::remove_dots(ResponseFile, true); + llvm::sys::path::native(ResponseFile); + + auto IsEquivalent = [ResponseFile](const ResponseFileRecord &RFile) { + return llvm::sys::fs::equivalent(RFile.File, ResponseFile); + }; + + // Check for recursive response files. + if (std::any_of(FileStack.begin() + 1, FileStack.end(), IsEquivalent)) { + // This file is recursive, so we leave it in the argument stream and + // move on. + AllExpanded = false; + ++I; + continue; + } + + // Replace this response file argument with the tokenization of its + // contents. Nested response files are expanded in subsequent iterations. + SmallVector ExpandedArgv; + llvm::BumpPtrAllocator Alloc; + llvm::StringSaver Saver(Alloc); + llvm::SmallVector T; + if (!expandResponseFile(ResponseFile, Saver, Tokenizer, ExpandedArgv)) { + // We couldn't read this file, so we leave it in the argument stream and + // move on. + AllExpanded = false; + ++I; + continue; + } + + for (ResponseFileRecord &Record : FileStack) { + // Increase the end of all active records by the number of newly expanded + // arguments, minus the response file itself. + Record.End += ExpandedArgv.size() - 1; + } + + FileStack.push_back({ResponseFile, I + ExpandedArgv.size()}); + Argv.erase(Argv.begin() + I); + Argv.insert(Argv.begin() + I, ExpandedArgv.begin(), ExpandedArgv.end()); + } + + // If successful, the top of the file stack will mark the end of the Argv + // stream. A failure here indicates a bug in the stack popping logic above. + // Note that FileStack may have more than one element at this point because we + // don't have a chance to pop the stack when encountering recursive files at + // the end of the stream, so seeing that doesn't indicate a bug. + assert(FileStack.size() > 0 && Argv.size() == FileStack.back().End); + return AllExpanded; +} + +class ExpandResponseFilesDatabase : public CompilationDatabase { +public: + ExpandResponseFilesDatabase(std::unique_ptr Base, + llvm::cl::TokenizerCallback Tokenizer) + : Base(std::move(Base)), Tokenizer(Tokenizer) { + assert(this->Base != nullptr); + } + + std::vector getAllFiles() const override { + return Base->getAllFiles(); + } + + std::vector + getCompileCommands(StringRef FilePath) const override { + return expand(Base->getCompileCommands(FilePath)); + } + + std::vector getAllCompileCommands() const override { + return expand(Base->getAllCompileCommands()); + } + +private: + std::vector expand(std::vector Cmds) const { + for (auto &Cmd : Cmds) { + expandResponseFiles(Cmd, Tokenizer); + } + return Cmds; + } + +private: + std::unique_ptr Base; + llvm::cl::TokenizerCallback Tokenizer; +}; + +} // namespace + +std::unique_ptr +expandResponseFiles(std::unique_ptr Base, + llvm::cl::TokenizerCallback Tokenizer) { + if (Tokenizer == nullptr) { + Tokenizer = llvm::Triple(llvm::sys::getProcessTriple()).isOSWindows() + ? llvm::cl::TokenizeWindowsCommandLine + : llvm::cl::TokenizeGNUCommandLine; + } + return std::make_unique(std::move(Base), + Tokenizer); +} + +} // namespace tooling +} // namespace clang Index: clang/lib/Tooling/JSONCompilationDatabase.cpp =================================================================== --- clang/lib/Tooling/JSONCompilationDatabase.cpp +++ clang/lib/Tooling/JSONCompilationDatabase.cpp @@ -167,8 +167,8 @@ llvm::sys::path::append(JSONDatabasePath, "compile_commands.json"); auto Base = JSONCompilationDatabase::loadFromFile( JSONDatabasePath, ErrorMessage, JSONCommandLineSyntax::AutoDetect); - return Base ? inferTargetAndDriverMode( - inferMissingCompileCommands(std::move(Base))) + return Base ? inferTargetAndDriverMode(inferMissingCompileCommands( + expandResponseFiles(std::move(Base)))) : nullptr; } }; Index: clang/unittests/Tooling/CompilationDatabaseTest.cpp =================================================================== --- clang/unittests/Tooling/CompilationDatabaseTest.cpp +++ clang/unittests/Tooling/CompilationDatabaseTest.cpp @@ -17,6 +17,7 @@ #include "llvm/Support/TargetSelect.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include namespace clang { namespace tooling { @@ -859,5 +860,76 @@ "clang++ --driver-mode=g++ bar.cpp -D bar.cpp"); } +class ExpandResponseFilesTest : public MemDBTest { +protected: + void SetUp() override { + InnerDir = path(StringRef("inner")); + std::error_code EC = llvm::sys::fs::create_directory(InnerDir); + EXPECT_TRUE(!EC); + + llvm::sys::path::append(RspFileName1, InnerDir, "rsp1.rsp"); + std::ofstream RspFile1(RspFileName1.c_str()); + RspFile1 << "-Dflag1"; + RspFile1.close(); + + RspFileName2 = path(StringRef("rsp2.rsp")); + std::ofstream RspFile2(RspFileName2.c_str()); + RspFile2 << "-Dflag2 @rsp3.rsp"; + RspFile2.close(); + + RspFileName3 = path(StringRef("rsp3.rsp")); + std::ofstream RspFile3(RspFileName3.c_str()); + RspFile3 << "-Dflag3"; + RspFile3.close(); + + RspFileName4 = path(StringRef("rsp4.rsp")); + std::ofstream RspFile4(RspFileName4.c_str()); + RspFile4 << "-Dflag4 @rsp4.rsp"; + RspFile4.close(); + + llvm::sys::path::append(RspFileName5, InnerDir, "rsp5.rsp"); + std::ofstream RspFile5(RspFileName5.c_str()); + RspFile5 << "-Dflag5 @inner/rsp1.rsp"; + RspFile5.close(); + } + + void TearDown() override { + llvm::sys::fs::remove(RspFileName5); + llvm::sys::fs::remove(RspFileName4); + llvm::sys::fs::remove(RspFileName3); + llvm::sys::fs::remove(RspFileName2); + llvm::sys::fs::remove(RspFileName1); + llvm::sys::fs::remove(InnerDir); + } + + std::string getCommand(llvm::StringRef F) { + auto Results = expandResponseFiles(std::make_unique(Entries)) + ->getCompileCommands(path(F)); + if (Results.empty()) { + return "none"; + } + return llvm::join(Results[0].CommandLine, " "); + } + + SmallString<128> InnerDir; + SmallString<128> RspFileName1; + SmallString<128> RspFileName2; + SmallString<128> RspFileName3; + SmallString<128> RspFileName4; + SmallString<128> RspFileName5; +}; + +TEST_F(ExpandResponseFilesTest, ExpandResponseFiles) { + // clang-format off + add("foo.cpp", "clang", + ("@inner/rsp1.rsp @rsp2.rsp @rsp4.rsp " + "@" + RspFileName1 + " @inner/rsp5.rsp @rsp6.rsp") + .str()); + // clang-format on + EXPECT_EQ(getCommand("foo.cpp"), "clang foo.cpp -D foo.cpp -Dflag1 -Dflag2 " + "-Dflag3 -Dflag4 @rsp4.rsp -Dflag1 " + "-Dflag5 -Dflag1 @rsp6.rsp"); +} + } // end namespace tooling } // end namespace clang