Index: clang/lib/Tooling/JSONCompilationDatabase.cpp =================================================================== --- clang/lib/Tooling/JSONCompilationDatabase.cpp +++ clang/lib/Tooling/JSONCompilationDatabase.cpp @@ -24,6 +24,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" @@ -49,9 +50,9 @@ /// Assumes \-escaping for quoted arguments (see the documentation of /// unescapeCommandLine(...)). class CommandLineArgumentParser { - public: +public: CommandLineArgumentParser(StringRef CommandLine) - : Input(CommandLine), Position(Input.begin()-1) {} + : Input(CommandLine), Position(Input.begin() - 1) {} std::vector parse() { bool HasMoreInput = true; @@ -63,46 +64,56 @@ return CommandLine; } - private: +private: // All private methods return true if there is more input available. bool parseStringInto(std::string &String) { do { if (*Position == '"') { - if (!parseDoubleQuotedStringInto(String)) return false; + if (!parseDoubleQuotedStringInto(String)) + return false; } else if (*Position == '\'') { - if (!parseSingleQuotedStringInto(String)) return false; + if (!parseSingleQuotedStringInto(String)) + return false; } else { - if (!parseFreeStringInto(String)) return false; + if (!parseFreeStringInto(String)) + return false; } } while (*Position != ' '); return true; } bool parseDoubleQuotedStringInto(std::string &String) { - if (!next()) return false; + if (!next()) + return false; while (*Position != '"') { - if (!skipEscapeCharacter()) return false; + if (!skipEscapeCharacter()) + return false; String.push_back(*Position); - if (!next()) return false; + if (!next()) + return false; } return next(); } bool parseSingleQuotedStringInto(std::string &String) { - if (!next()) return false; + if (!next()) + return false; while (*Position != '\'') { String.push_back(*Position); - if (!next()) return false; + if (!next()) + return false; } return next(); } bool parseFreeStringInto(std::string &String) { do { - if (!skipEscapeCharacter()) return false; + if (!skipEscapeCharacter()) + return false; String.push_back(*Position); - if (!next()) return false; + if (!next()) + return false; } while (*Position != ' ' && *Position != '"' && *Position != '\''); return true; } @@ -116,7 +127,8 @@ bool nextNonWhitespace() { do { - if (!next()) return false; + if (!next()) + return false; } while (*Position == ' '); return true; } @@ -158,6 +170,124 @@ return parser.parse(); } +bool expandResponseFile(llvm::StringRef FName, llvm::StringSaver &Saver, + llvm::cl::TokenizerCallback Tokenizer, + SmallVectorImpl &NewArgv) { + llvm::ErrorOr> MemBufOrErr = + llvm::MemoryBuffer::getFile(FName); + if (!MemBufOrErr) + return false; + llvm::MemoryBuffer &MemBuf = *MemBufOrErr.get(); + StringRef Str(MemBuf.getBufferStart(), MemBuf.getBufferSize()); + + // If we have a UTF-16 byte order mark, convert to UTF-8 for parsing. + ArrayRef BufRef(MemBuf.getBufferStart(), MemBuf.getBufferEnd()); + std::string UTF8Buf; + // It is called byte order marker but the UTF-8 BOM is actually not affected + // by the host system's endianness. + auto HasUtF8ByteOrderMark = [](ArrayRef S) { + return (S.size() >= 3 && S[0] == '\xef' && S[1] == '\xbb' && + S[2] == '\xbf'); + }; + if (llvm::hasUTF16ByteOrderMark(BufRef)) { + if (!convertUTF16ToUTF8String(BufRef, UTF8Buf)) + return false; + Str = StringRef(UTF8Buf); + } + // If we see UTF-8 BOM sequence at the beginning of a file, we shall remove + // these bytes before parsing. + // Reference: http://en.wikipedia.org/wiki/UTF-8#Byte_order_mark + else if (HasUtF8ByteOrderMark(BufRef)) + Str = StringRef(BufRef.data() + 3, BufRef.size() - 3); + // Tokenize the contents into NewArgv. + Tokenizer(Str, Saver, NewArgv, false); + return true; +} + +bool expandResponseFiles(tooling::CompileCommand &Cmd, + llvm::cl::TokenizerCallback Tokenizer) { + bool AllExpanded = true; + struct ResponseFileRecord { + llvm::StringRef File; + size_t End; + }; + std::vector &Argv = Cmd.CommandLine; + // To detect recursive response files, we maintain a stack of files and the + // position of the last argument in the file. This position is updated + // dynamically as we recursively expand files. + SmallVector FileStack; + + // Push a dummy entry that represents the initial command line, removing + // the need to check for an empty list. + FileStack.push_back({"", Argv.size()}); + + // Don't cache Argv.size() because it can change. + for (unsigned I = 0; I != Argv.size();) { + while (I == FileStack.back().End) { + // Passing the end of a file's argument list, so we can remove it from the + // stack. + FileStack.pop_back(); + } + + std::string &Arg = Argv[I]; + + if (Arg[0] != '@') { + ++I; + continue; + } + SmallString<128> ResponseFile; + if (llvm::sys::path::is_relative(&Arg[1])) { + llvm::sys::path::append(ResponseFile, Cmd.Directory, &Arg[1]); + } + llvm::sys::path::remove_dots(ResponseFile); + + auto IsEquivalent = [ResponseFile](const ResponseFileRecord &RFile) { + return llvm::sys::fs::equivalent(RFile.File, ResponseFile); + }; + + // Check for recursive response files. + if (std::any_of(FileStack.begin() + 1, FileStack.end(), IsEquivalent)) { + // This file is recursive, so we leave it in the argument stream and + // move on. + AllExpanded = false; + ++I; + continue; + } + + // Replace this response file argument with the tokenization of its + // contents. Nested response files are expanded in subsequent iterations. + SmallVector ExpandedArgv; + llvm::BumpPtrAllocator Alloc; + llvm::StringSaver Saver(Alloc); + llvm::SmallVector T; + if (!expandResponseFile(ResponseFile, Saver, Tokenizer, ExpandedArgv)) { + // We couldn't read this file, so we leave it in the argument stream and + // move on. + AllExpanded = false; + ++I; + continue; + } + + for (ResponseFileRecord &Record : FileStack) { + // Increase the end of all active records by the number of newly expanded + // arguments, minus the response file itself. + Record.End += ExpandedArgv.size() - 1; + } + + FileStack.push_back({ResponseFile, I + ExpandedArgv.size()}); + Argv.erase(Argv.begin() + I); + Argv.insert(Argv.begin() + I, ExpandedArgv.begin(), ExpandedArgv.end()); + } + + // If successful, the top of the file stack will mark the end of the Argv + // stream. A failure here indicates a bug in the stack popping logic above. + // Note that FileStack may have more than one element at this point because we + // don't have a chance to pop the stack when encountering recursive files at + // the end of the stream, so seeing that doesn't indicate a bug. + assert(FileStack.size() > 0 && Argv.size() == FileStack.back().End); + return AllExpanded; +} + // This plugin locates a nearby compile_command.json file, and also infers // compile commands for files not present in the database. class JSONCompilationDatabasePlugin : public CompilationDatabasePlugin { @@ -178,7 +308,8 @@ // Register the JSONCompilationDatabasePlugin with the // CompilationDatabasePluginRegistry using this statically initialized variable. static CompilationDatabasePluginRegistry::Add -X("json-compilation-database", "Reads JSON formatted compilation databases"); + X("json-compilation-database", + "Reads JSON formatted compilation databases"); namespace clang { namespace tooling { @@ -241,8 +372,7 @@ return Commands; } -std::vector -JSONCompilationDatabase::getAllFiles() const { +std::vector JSONCompilationDatabase::getAllFiles() const { std::vector Result; for (const auto &CommandRef : IndexByFile) Result.push_back(CommandRef.first().str()); @@ -313,16 +443,21 @@ void JSONCompilationDatabase::getCommands( ArrayRef CommandsRef, std::vector &Commands) const { + auto Tokenizer = llvm::Triple(llvm::sys::getProcessTriple()).isOSWindows() + ? llvm::cl::TokenizeWindowsCommandLine + : llvm::cl::TokenizeGNUCommandLine; for (const auto &CommandRef : CommandsRef) { SmallString<8> DirectoryStorage; SmallString<32> FilenameStorage; SmallString<32> OutputStorage; auto Output = std::get<3>(CommandRef); - Commands.emplace_back( - std::get<0>(CommandRef)->getValue(DirectoryStorage), - std::get<1>(CommandRef)->getValue(FilenameStorage), - nodeToCommandLine(Syntax, std::get<2>(CommandRef)), - Output ? Output->getValue(OutputStorage) : ""); + CompileCommand Cmd(std::get<0>(CommandRef)->getValue(DirectoryStorage), + std::get<1>(CommandRef)->getValue(FilenameStorage), + nodeToCommandLine(Syntax, std::get<2>(CommandRef)), + Output ? Output->getValue(OutputStorage) : ""); + // ExpandResponseFile + expandResponseFiles(Cmd, Tokenizer); + Commands.push_back(std::move(Cmd)); } } @@ -352,7 +487,7 @@ llvm::Optional> Command; llvm::yaml::ScalarNode *File = nullptr; llvm::yaml::ScalarNode *Output = nullptr; - for (auto& NextKeyValue : *Object) { + for (auto &NextKeyValue : *Object) { auto *KeyString = dyn_cast(NextKeyValue.getKey()); if (!KeyString) { ErrorMessage = "Expected strings as key."; @@ -394,8 +529,8 @@ } else if (KeyValue == "output") { Output = ValueString; } else { - ErrorMessage = ("Unknown key: \"" + - KeyString->getRawValue() + "\"").str(); + ErrorMessage = + ("Unknown key: \"" + KeyString->getRawValue() + "\"").str(); return false; } } @@ -416,10 +551,9 @@ SmallString<128> NativeFilePath; if (llvm::sys::path::is_relative(FileName)) { SmallString<8> DirectoryStorage; - SmallString<128> AbsolutePath( - Directory->getValue(DirectoryStorage)); + SmallString<128> AbsolutePath(Directory->getValue(DirectoryStorage)); llvm::sys::path::append(AbsolutePath, FileName); - llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/ true); + llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/true); llvm::sys::path::native(AbsolutePath, NativeFilePath); } else { llvm::sys::path::native(FileName, NativeFilePath);