Index: clang/include/clang/Tooling/CompilationDatabase.h =================================================================== --- clang/include/clang/Tooling/CompilationDatabase.h +++ clang/include/clang/Tooling/CompilationDatabase.h @@ -31,6 +31,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/CommandLine.h" #include #include #include @@ -219,6 +220,15 @@ std::unique_ptr inferTargetAndDriverMode(std::unique_ptr Base); +/// Expand response files on a command line recursively using the given +/// tokenization strategy. +/// +/// \param [in,out] Cmd Command line into which to expand response files. +/// \param [in] Tokenizer Tokenization strategy. Typically Unix or Windows. +/// \return true if all @files were expanded successfully or there were none. +bool expandResponseFiles(tooling::CompileCommand &Cmd, + llvm::cl::TokenizerCallback Tokenizer); + } // namespace tooling } // namespace clang Index: clang/lib/Tooling/CompilationDatabase.cpp =================================================================== --- clang/lib/Tooling/CompilationDatabase.cpp +++ clang/lib/Tooling/CompilationDatabase.cpp @@ -36,6 +36,7 @@ #include "llvm/Option/Arg.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/Host.h" #include "llvm/Support/LineIterator.h" @@ -382,6 +383,12 @@ CompileCommands.emplace_back(Directory, StringRef(), std::move(ToolCommandLine), StringRef()); + auto Tokenizer = llvm::Triple(llvm::sys::getProcessTriple()).isOSWindows() + ? llvm::cl::TokenizeWindowsCommandLine + : llvm::cl::TokenizeGNUCommandLine; + llvm::for_each(CompileCommands, [Tokenizer](CompileCommand &Cmd) { + expandResponseFiles(Cmd, Tokenizer); + }); } std::vector @@ -416,5 +423,126 @@ extern volatile int JSONAnchorSource; static int LLVM_ATTRIBUTE_UNUSED JSONAnchorDest = JSONAnchorSource; +static bool expandResponseFile(llvm::StringRef FName, llvm::StringSaver &Saver, + llvm::cl::TokenizerCallback Tokenizer, + SmallVectorImpl &NewArgv) { + llvm::ErrorOr> MemBufOrErr = + llvm::MemoryBuffer::getFile(FName); + if (!MemBufOrErr) + return false; + llvm::MemoryBuffer &MemBuf = *MemBufOrErr.get(); + StringRef Str(MemBuf.getBufferStart(), MemBuf.getBufferSize()); + + // If we have a UTF-16 byte order mark, convert to UTF-8 for parsing. + ArrayRef BufRef(MemBuf.getBufferStart(), MemBuf.getBufferEnd()); + std::string UTF8Buf; + // It is called byte order marker but the UTF-8 BOM is actually not affected + // by the host system's endianness. + auto HasUtF8ByteOrderMark = [](ArrayRef S) { + return (S.size() >= 3 && S[0] == '\xef' && S[1] == '\xbb' && + S[2] == '\xbf'); + }; + if (llvm::hasUTF16ByteOrderMark(BufRef)) { + if (!convertUTF16ToUTF8String(BufRef, UTF8Buf)) + return false; + Str = StringRef(UTF8Buf); + } + // If we see UTF-8 BOM sequence at the beginning of a file, we shall remove + // these bytes before parsing. + // Reference: http://en.wikipedia.org/wiki/UTF-8#Byte_order_mark + else if (HasUtF8ByteOrderMark(BufRef)) + Str = StringRef(BufRef.data() + 3, BufRef.size() - 3); + // Tokenize the contents into NewArgv. + Tokenizer(Str, Saver, NewArgv, false); + return true; +} + +bool expandResponseFiles(tooling::CompileCommand &Cmd, + llvm::cl::TokenizerCallback Tokenizer) { + bool AllExpanded = true; + struct ResponseFileRecord { + llvm::SmallString<128> File; + size_t End; + }; + std::vector &Argv = Cmd.CommandLine; + // To detect recursive response files, we maintain a stack of files and the + // position of the last argument in the file. This position is updated + // dynamically as we recursively expand files. + SmallVector FileStack; + + // Push a dummy entry that represents the initial command line, removing + // the need to check for an empty list. + FileStack.push_back({llvm::SmallString<128>(""), Argv.size()}); + + // Don't cache Argv.size() because it can change. + for (unsigned I = 0; I != Argv.size();) { + while (I == FileStack.back().End) { + // Passing the end of a file's argument list, so we can remove it from the + // stack. + FileStack.pop_back(); + } + + std::string &Arg = Argv[I]; + + if (Arg[0] != '@') { + ++I; + continue; + } + SmallString<128> ResponseFile; + if (llvm::sys::path::is_relative(&Arg[1])) { + llvm::sys::path::append(ResponseFile, Cmd.Directory, &Arg[1]); + } else { + ResponseFile.append(Arg.begin() + 1, Arg.end()); + } + llvm::sys::path::remove_dots(ResponseFile, true); + llvm::sys::path::native(ResponseFile); + + auto IsEquivalent = [&ResponseFile](const ResponseFileRecord &RFile) { + return llvm::sys::fs::equivalent(RFile.File, ResponseFile); + }; + + // Check for recursive response files. + if (std::any_of(FileStack.begin() + 1, FileStack.end(), IsEquivalent)) { + // This file is recursive, so we leave it in the argument stream and + // move on. + AllExpanded = false; + ++I; + continue; + } + + // Replace this response file argument with the tokenization of its + // contents. Nested response files are expanded in subsequent iterations. + SmallVector ExpandedArgv; + llvm::BumpPtrAllocator Alloc; + llvm::StringSaver Saver(Alloc); + llvm::SmallVector T; + if (!expandResponseFile(ResponseFile, Saver, Tokenizer, ExpandedArgv)) { + // We couldn't read this file, so we leave it in the argument stream and + // move on. + AllExpanded = false; + ++I; + continue; + } + + for (ResponseFileRecord &Record : FileStack) { + // Increase the end of all active records by the number of newly expanded + // arguments, minus the response file itself. + Record.End += ExpandedArgv.size() - 1; + } + + FileStack.push_back({ResponseFile, I + ExpandedArgv.size()}); + Argv.erase(Argv.begin() + I); + Argv.insert(Argv.begin() + I, ExpandedArgv.begin(), ExpandedArgv.end()); + } + + // If successful, the top of the file stack will mark the end of the Argv + // stream. A failure here indicates a bug in the stack popping logic above. + // Note that FileStack may have more than one element at this point because we + // don't have a chance to pop the stack when encountering recursive files at + // the end of the stream, so seeing that doesn't indicate a bug. + assert(FileStack.size() > 0 && Argv.size() == FileStack.back().End); + return AllExpanded; +} + } // namespace tooling } // namespace clang Index: clang/lib/Tooling/JSONCompilationDatabase.cpp =================================================================== --- clang/lib/Tooling/JSONCompilationDatabase.cpp +++ clang/lib/Tooling/JSONCompilationDatabase.cpp @@ -313,16 +313,28 @@ void JSONCompilationDatabase::getCommands( ArrayRef CommandsRef, std::vector &Commands) const { + auto GetTokenizer = [](JSONCommandLineSyntax Syntax) { + if (Syntax == JSONCommandLineSyntax::AutoDetect) { + Syntax = llvm::Triple(llvm::sys::getProcessTriple()).isOSWindows() + ? JSONCommandLineSyntax::Windows + : JSONCommandLineSyntax::Gnu; + } + return Syntax == JSONCommandLineSyntax::Gnu + ? llvm::cl::TokenizeGNUCommandLine + : llvm::cl::TokenizeWindowsCommandLine; + }; + auto Tokenizer = GetTokenizer(Syntax); for (const auto &CommandRef : CommandsRef) { SmallString<8> DirectoryStorage; SmallString<32> FilenameStorage; SmallString<32> OutputStorage; auto Output = std::get<3>(CommandRef); - Commands.emplace_back( - std::get<0>(CommandRef)->getValue(DirectoryStorage), - std::get<1>(CommandRef)->getValue(FilenameStorage), - nodeToCommandLine(Syntax, std::get<2>(CommandRef)), - Output ? Output->getValue(OutputStorage) : ""); + CompileCommand Cmd(std::get<0>(CommandRef)->getValue(DirectoryStorage), + std::get<1>(CommandRef)->getValue(FilenameStorage), + nodeToCommandLine(Syntax, std::get<2>(CommandRef)), + Output ? Output->getValue(OutputStorage) : ""); + expandResponseFiles(Cmd, Tokenizer); + Commands.push_back(std::move(Cmd)); } } Index: clang/unittests/Tooling/CompilationDatabaseTest.cpp =================================================================== --- clang/unittests/Tooling/CompilationDatabaseTest.cpp +++ clang/unittests/Tooling/CompilationDatabaseTest.cpp @@ -17,6 +17,7 @@ #include "llvm/Support/TargetSelect.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include namespace clang { namespace tooling { @@ -859,5 +860,132 @@ "clang++ --driver-mode=g++ bar.cpp -D bar.cpp"); } +TEST(ExpandResponseFileTest, JSONCompilationDatabase) { + SmallString<128> TestDir; + std::error_code EC = + llvm::sys::fs::createUniqueDirectory("unittest", TestDir); + EXPECT_TRUE(!EC); + + SmallString<128> TestMainFileName; + llvm::sys::path::append(TestMainFileName, TestDir, "main.cpp"); + + SmallString<128> InnerDir; + llvm::sys::path::append(InnerDir, TestDir, "inner"); + EC = llvm::sys::fs::create_directory(InnerDir); + EXPECT_TRUE(!EC); + + SmallString<128> RspFileName1; + llvm::sys::path::append(RspFileName1, InnerDir, "rsp1.rsp"); + std::ofstream RspFile1(RspFileName1.c_str()); + RspFile1 << "-Dflag1"; + RspFile1.close(); + + SmallString<128> RspFileName2; + llvm::sys::path::append(RspFileName2, TestDir, "rsp2.rsp"); + std::ofstream RspFile2(RspFileName2.c_str()); + RspFile2 << "-Dflag2 @rsp3.rsp"; + RspFile2.close(); + + SmallString<128> RspFileName3; + llvm::sys::path::append(RspFileName3, TestDir, "rsp3.rsp"); + std::ofstream RspFile3(RspFileName3.c_str()); + RspFile3 << "-Dflag3"; + RspFile3.close(); + + SmallString<128> RspFileName4; + llvm::sys::path::append(RspFileName4, TestDir, "rsp4.rsp"); + std::ofstream RspFile4(RspFileName4.c_str()); + RspFile4 << "-Dflag4 @rsp4.rsp"; + RspFile4.close(); + + SmallString<128> RspFileName5; + llvm::sys::path::append(RspFileName5, InnerDir, "rsp5.rsp"); + std::ofstream RspFile5(RspFileName5.c_str()); + RspFile5 << "-Dflag5 @inner/rsp1.rsp"; + RspFile5.close(); + + SmallString<128> CompileCommandsFileName; + llvm::sys::path::append(CompileCommandsFileName, TestDir, + "compile_commands.json"); + std::ofstream CompileCommandsFile(CompileCommandsFileName.c_str()); + // clang-format off + CompileCommandsFile + << ("[{\"directory\": \"" + TestDir + "\"," + "\"command\": \"clang @inner/rsp1.rsp @rsp2.rsp @rsp4.rsp " + "@" + RspFileName1 + " @inner/rsp5.rsp @rsp6.rsp\"," + "\"file\": \"" + TestMainFileName + "\"}]").str(); + // clang-format on + CompileCommandsFile.close(); + + std::string ErrorMessage; + auto JsonDatabase = + JSONCompilationDatabase::loadFromDirectory(TestDir, ErrorMessage); + + EXPECT_TRUE(JsonDatabase); + auto FoundCommand = JsonDatabase->getCompileCommands(TestMainFileName); + + EXPECT_TRUE(FoundCommand.size() == 1u) << ErrorMessage; + EXPECT_EQ(FoundCommand[0].Directory, TestDir) << ErrorMessage; + EXPECT_THAT(FoundCommand[0].CommandLine, + ElementsAre("clang", "-Dflag1", "-Dflag2", "-Dflag3", "-Dflag4", + "@rsp4.rsp", "-Dflag1", "-Dflag5", "-Dflag1", + "@rsp6.rsp")) + << ErrorMessage; + EXPECT_EQ(FoundCommand[0].Filename, TestMainFileName) << ErrorMessage; + + llvm::sys::fs::remove(RspFileName5); + llvm::sys::fs::remove(RspFileName4); + llvm::sys::fs::remove(RspFileName3); + llvm::sys::fs::remove(RspFileName2); + llvm::sys::fs::remove(RspFileName1); + llvm::sys::fs::remove(InnerDir); + llvm::sys::fs::remove(TestDir); +} + +TEST(ExpandResponseFileTest, FixedCompilationDatabase) { + SmallString<128> TestDir; + std::error_code EC = + llvm::sys::fs::createUniqueDirectory("unittest", TestDir); + EXPECT_TRUE(!EC); + + SmallString<128> TestMainFileName; + llvm::sys::path::append(TestMainFileName, TestDir, "main.cpp"); + + SmallString<128> RspFileName1; + llvm::sys::path::append(RspFileName1, TestDir, "rsp1.rsp"); + std::ofstream RspFile1(RspFileName1.c_str()); + RspFile1 << "-Dflag1 @rsp2.rsp"; + RspFile1.close(); + + SmallString<128> RspFileName2; + llvm::sys::path::append(RspFileName2, TestDir, "rsp2.rsp"); + std::ofstream RspFile2(RspFileName2.c_str()); + RspFile2 << "-Dflag2"; + RspFile2.close(); + + SmallString<128> FixedCompilationFileName; + llvm::sys::path::append(FixedCompilationFileName, TestDir, + "compile_flags.txt"); + std::ofstream CompileCommandsFile(FixedCompilationFileName.c_str()); + CompileCommandsFile << "@rsp1.rsp"; + CompileCommandsFile.close(); + std::string ErrorMessage; + auto FixedDatabase = + FixedCompilationDatabase::loadFromDirectory(TestDir, ErrorMessage); + + EXPECT_TRUE(FixedDatabase); + auto FoundCommand = FixedDatabase->getCompileCommands(TestMainFileName); + + EXPECT_TRUE(FoundCommand.size() == 1u) << ErrorMessage; + EXPECT_THAT(FoundCommand[0].CommandLine, + ElementsAre(EndsWith("clang-tool"), "-Dflag1", "-Dflag2", + EndsWith("main.cpp"))) + << ErrorMessage; + + llvm::sys::fs::remove(RspFileName2); + llvm::sys::fs::remove(RspFileName1); + llvm::sys::fs::remove(TestDir); +} + } // end namespace tooling } // end namespace clang