Index: include/llvm/Option/OptTable.h =================================================================== --- include/llvm/Option/OptTable.h +++ include/llvm/Option/OptTable.h @@ -15,6 +15,9 @@ namespace llvm { class raw_ostream; +namespace cl { +class ExpandedArgs; +} namespace opt { class Arg; class ArgList; @@ -123,13 +126,18 @@ /// Zero is the default which includes all flags. /// \param [in] FlagsToExclude - Don't parse options with this flag. Zero /// is the default and means exclude nothing. + /// \param [in] ResponseFilesInfo - Information about whether an argument + /// comes from a response file, which can change the semantics of some + /// arguments. /// /// \return The parsed argument, or 0 if the argument is missing values /// (in which case Index still points at the conceptual next argument string /// to parse). - Arg *ParseOneArg(const ArgList &Args, unsigned &Index, - unsigned FlagsToInclude = 0, - unsigned FlagsToExclude = 0) const; + Arg * + ParseOneArg(const ArgList &Args, unsigned &Index, + unsigned FlagsToInclude = 0, + unsigned FlagsToExclude = 0, + const llvm::cl::ExpandedArgs *ResponseFilesInfo = nullptr) const; /// \brief Parse an list of arguments into an InputArgList. /// @@ -149,14 +157,18 @@ /// Zero is the default which includes all flags. /// \param FlagsToExclude - Don't parse options with this flag. Zero /// is the default and means exclude nothing. + /// \param ResponseFilesInfo - Information about whether an argument comes + /// from a response file, which can change the semantics of some arguments. /// \return An InputArgList; on error this will contain all the options /// which could be parsed. - InputArgList *ParseArgs(const char* const *ArgBegin, - const char* const *ArgEnd, - unsigned &MissingArgIndex, - unsigned &MissingArgCount, - unsigned FlagsToInclude = 0, - unsigned FlagsToExclude = 0) const; + InputArgList * + ParseArgs(const char* const *ArgBegin, + const char* const *ArgEnd, + unsigned &MissingArgIndex, + unsigned &MissingArgCount, + unsigned FlagsToInclude = 0, + unsigned FlagsToExclude = 0, + const llvm::cl::ExpandedArgs *ResponseFilesInfo = nullptr) const; /// \brief Render the help text for an option table. /// Index: include/llvm/Option/Option.h =================================================================== --- include/llvm/Option/Option.h +++ include/llvm/Option/Option.h @@ -194,7 +194,11 @@ /// \param ArgSize The number of bytes taken up by the matched Option prefix /// and name. This is used to determine where joined values /// start. - Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const; + /// \param LastInResponseFile If this arg came from a response file, this + /// tells us the index of the last argument + /// expanded from this response file. + Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize, + unsigned LastInResponseFile) const; void dump() const; }; Index: include/llvm/Support/CommandLine.h =================================================================== --- include/llvm/Support/CommandLine.h +++ include/llvm/Support/CommandLine.h @@ -20,6 +20,7 @@ #ifndef LLVM_SUPPORT_COMMANDLINE_H #define LLVM_SUPPORT_COMMANDLINE_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" @@ -1806,21 +1807,187 @@ void TokenizeWindowsCommandLine(StringRef Source, StringSaver &Saver, SmallVectorImpl &NewArgv); -/// \brief String tokenization function type. Should be compatible with either -/// Windows or Unix command line tokenizers. -typedef void (*TokenizerCallback)(StringRef Source, StringSaver &Saver, - SmallVectorImpl &NewArgv); - -/// \brief Expand response files on a command line recursively using the given -/// StringSaver and tokenization strategy. Argv should contain the command line -/// before expansion and will be modified in place. +/// \brief Encapsulates a reference to argv program arguments after expanding +/// its '@ arguments' (response files). +/// Does not own any args strings. String creation is made via a +/// user-provided StringSaver reference. +/// Maintains book-keeping information about which arguments were originally +/// expanded from a response file. We need this information in case the +/// semantics of some arguments depends on whether it was inside a response +/// file or not. /// -/// \param [in] Saver Delegates back to the caller for saving parsed strings. -/// \param [in] Tokenizer Tokenization strategy. Typically Unix or Windows. -/// \param [in,out] Argv Command line into which to expand response files. -/// \return true if all @files were expanded successfully or there were none. -bool ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, - SmallVectorImpl &Argv); +/// Usage examples: +/// +/// \code +/// ExpandedArgs ExpArgs = ExpandedArgs::ExpandResponseFiles( +/// ExpandedArgs::GNU, ArrayRef(argv, argc), Saver); +/// // get expanded argv +/// const SmallVectorImpl &argv = ExpArgs.get(); +/// // insert a new argument before the first argument +/// ExpArgs.insert(ExpArgs.begin(), ExpArgs.SaveString("-newarg")); +/// // prints all arguments expanded from response files, a line per +/// // response file. +/// for (unsigned i = 0; i < argv.size(); ++i) { +/// unsigned last = 0; +/// if (ExpArgs.lookupBoundaryByIndex(i, last)) { +/// for (; i < last; ++i) +/// errs() << " " << argv[i]; +/// errs() << "\n"; +/// } +/// } +/// \endcode +class ExpandedArgs : public StringSaver { +public: + // Encodes which tokenization strategy to use when expanding response + // files. + enum TokenizerType { GNU, Windows }; + typedef SmallVector::iterator iterator; + typedef SmallVector::size_type size_type; + +private: + typedef SmallVector, 4> BoundaryVectorTy; + + // The argv vector reference (the strings themselves live outside this + // class) + SmallVector Args; + // A vector of pair of indexes , indicating the boundaries of + // each response file (index of the first argument that comes from a response + // file expansion and index of the last one). + BoundaryVectorTy ResponseBoundaries; + + // We save new expanded strings through this interface + StringSaver *StringStorage; + // Whether we had an error expanding response files + bool Error; + + // Default constructor: We need an array of string references, where each + // string corresponds to a command-line argument, and a StringSaver + // reference. We use StringSaver as an external object that manages our + // string allocation needs, obviating us from managing the string location. + // This brings an advantage: ExpandedArgs may be easily copied without + // changing string references, which would require us to update the entire + // args vector. + ExpandedArgs(ArrayRef _Args, StringSaver *_SS); + // Constructor that moves an existing SmallVector rvalue + // to our private args vector, saving a vector copy. + ExpandedArgs(SmallVector &&_Args, StringSaver *_SS); + + // Expands a single response file whose name is given by FName. The + // command-line arguments contained in this file is copied to NewArgv. + bool ExpandResponseFile(const char *FName, TokenizerType Tokenizer, + SmallVectorImpl &NewArgv); + // Scans our Args vector for @ arguments. If they refer to a valid file, + // expand them with the file contents. Tokenizer defines which tokenization + // strategy to use when parsing each response file. + void ExpandResponseFiles(TokenizerType Tokenizer); + +public: + // Indicates whether there was an error in the response file expansion + bool hasError() const { return Error; } + + ExpandedArgs(const ExpandedArgs &&Other); + ExpandedArgs &operator=(ExpandedArgs &&RHS); + + // Saves a string using our internal string storage reference. A + // convenience function to allow our user to store strings using the same + // StringSaver reference that we are using. + virtual const char *SaveString(const char *Str) override; + // Convenience function to cast a StringRef to char pointer and forward it + // to our string storage reference. + const char *SaveString(StringRef Str) { + return StringStorage->SaveString(Str.str().c_str()); + } + + /// \brief Expand response files on a command line recursively using the given + /// tokenization strategy. Argv should contain the command line before + /// expansion and will be copied. + /// + /// \param [in] Tokenizer Tokenization strategy. Typically Unix or Windows. + /// \param [in] Argv Command line + /// \param [in] SS Reference to an external string storage to save our new + // string args (StringSaver reference). + /// \return instance of ExpandedArgs with hasError() set to false if all + /// @files were expanded successfully or there were none. + static ExpandedArgs ExpandResponseFiles(TokenizerType Tokenizer, + ArrayRef Argv, + StringSaver *SS); + /// \brief A variation of ExpandResponseFiles that accepts a SmallVector + /// rvalue that contains the initial command-line arguments. We move it to + /// our args vector and start working with response file expansion from it, + /// saving a vector object copy. + static ExpandedArgs ExpandResponseFiles(TokenizerType Tokenizer, + SmallVector &&Argv, + StringSaver *SS); + + // Read-only direct access to the Args vector + const SmallVectorImpl &get() const { return Args; } + // Operations that do not change vector size are fine + iterator begin() { return Args.begin(); } + iterator end() { return Args.end(); } + void push_back(const char *Elt) { Args.push_back(Elt); } + size_type size() const { return Args.size(); } + const char *&operator[](unsigned i) { return Args[i]; } + + iterator insert(iterator I, const char *Elt) { + size_t Idx = I - Args.begin(); + for (auto &elem : ResponseBoundaries) { + if (Idx <= elem.first) + ++(elem.first); + if (Idx <= elem.second) + ++(elem.second); + } + return Args.insert(I, Elt); + } + + template iterator insert(iterator I, ItTy From, ItTy To) { + size_t Idx = I - Args.begin(); + size_t NumToInsert = std::distance(From, To); + for (auto &elem : ResponseBoundaries) { + if (Idx <= elem.first) + elem.first += NumToInsert; + if (Idx <= elem.second) + elem.second += NumToInsert; + } + return Args.insert(I, From, To); + } + + iterator erase(iterator I) { + size_t Idx = I - Args.begin(); + SmallVector RemovalWorklist; + for (auto &elem : ResponseBoundaries) { + if (Idx < elem.first) + --(elem.first); + if (Idx <= elem.second) + --(elem.second); + if (elem.first == elem.second) + RemovalWorklist.push_back(&elem); + } + for (auto elem : RemovalWorklist) + ResponseBoundaries.erase(elem); + return Args.erase(I); + } + + /// \brief Checks if the argument of index Idx originally came from a response + /// file expansion. If positive, returns the index of the last argument that + /// came from the same response file. + /// + /// \param [in] Idx Index of the argument that you want to query. + /// \param [out] Boundary Index of the last argument that came from the same + /// response file. + /// \return true if the argument of index Idx originally came from a response + /// file. + bool lookupBoundaryByIndex(unsigned Idx, unsigned &Boundary) const { + for (auto elem : ResponseBoundaries) { + if (Idx < elem.first) + continue; + if (Idx <= elem.second) { + Boundary = elem.second; + return true; + } + } + return false; + } +}; } // End namespace cl Index: lib/Option/OptTable.cpp =================================================================== --- lib/Option/OptTable.cpp +++ lib/Option/OptTable.cpp @@ -11,6 +11,7 @@ #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -192,9 +193,11 @@ return 0; } -Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, - unsigned FlagsToInclude, - unsigned FlagsToExclude) const { +Arg * +OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, + unsigned FlagsToInclude, + unsigned FlagsToExclude, + const llvm::cl::ExpandedArgs *ResponseFilesInfo) const { unsigned Prev = Index; const char *Str = Args.getArgString(Index); @@ -235,7 +238,17 @@ continue; // See if this option matches. - if (Arg *A = Opt.accept(Args, Index, ArgSize)) + unsigned LastInResponseFile = Args.getNumInputArgStrings() - 1; + if (ResponseFilesInfo) { + // Adjust our index (+1 in input and -1 in the answer) to compensate for + // the fact that our Args vector is shifted/missing the first argument + // (program name) in comparison with the one in ResponseFilesInfo. + if (ResponseFilesInfo->lookupBoundaryByIndex(Index + 1, + LastInResponseFile)) + --LastInResponseFile; + } + + if (Arg *A = Opt.accept(Args, Index, ArgSize, LastInResponseFile)) return A; // Otherwise, see if this argument was missing values. @@ -251,12 +264,14 @@ return new Arg(getOption(TheUnknownOptionID), Str, Index++, Str); } -InputArgList *OptTable::ParseArgs(const char *const *ArgBegin, - const char *const *ArgEnd, - unsigned &MissingArgIndex, - unsigned &MissingArgCount, - unsigned FlagsToInclude, - unsigned FlagsToExclude) const { +InputArgList * +OptTable::ParseArgs(const char *const *ArgBegin, + const char *const *ArgEnd, + unsigned &MissingArgIndex, + unsigned &MissingArgCount, + unsigned FlagsToInclude, + unsigned FlagsToExclude, + const llvm::cl::ExpandedArgs *ResponseFilesInfo) const { InputArgList *Args = new InputArgList(ArgBegin, ArgEnd); // FIXME: Handle '@' args (or at least error on them). @@ -272,7 +287,8 @@ } unsigned Prev = Index; - Arg *A = ParseOneArg(*Args, Index, FlagsToInclude, FlagsToExclude); + Arg *A = ParseOneArg(*Args, Index, FlagsToInclude, FlagsToExclude, + ResponseFilesInfo); assert(Index > Prev && "Parser failed to consume argument."); // Check for missing argument error. Index: lib/Option/Option.cpp =================================================================== --- lib/Option/Option.cpp +++ lib/Option/Option.cpp @@ -102,7 +102,8 @@ Arg *Option::accept(const ArgList &Args, unsigned &Index, - unsigned ArgSize) const { + unsigned ArgSize, + unsigned LastInResponseFile) const { const Option &UnaliasedOption = getUnaliasedOption(); StringRef Spelling; // If the option was an alias, get the spelling from the unaliased one. @@ -221,7 +222,7 @@ if (ArgSize != strlen(Args.getArgString(Index))) return nullptr; Arg *A = new Arg(UnaliasedOption, Spelling, Index++); - while (Index < Args.getNumInputArgStrings()) + while (Index <= LastInResponseFile) A->getValues().push_back(Args.getArgString(Index++)); return A; } Index: lib/Support/CommandLine.cpp =================================================================== --- lib/Support/CommandLine.cpp +++ lib/Support/CommandLine.cpp @@ -628,9 +628,8 @@ NewArgv.push_back(Saver.SaveString(Token.c_str())); } -static bool ExpandResponseFile(const char *FName, StringSaver &Saver, - TokenizerCallback Tokenizer, - SmallVectorImpl &NewArgv) { +bool ExpandedArgs::ExpandResponseFile(const char *FName, TokenizerType Tokenizer, + SmallVectorImpl &NewArgv) { ErrorOr> MemBufOrErr = MemoryBuffer::getFile(FName); if (!MemBufOrErr) @@ -648,21 +647,63 @@ } // Tokenize the contents into NewArgv. - Tokenizer(Str, Saver, NewArgv); + switch (Tokenizer) { + case GNU: + TokenizeGNUCommandLine(Str, *this, NewArgv); + break; + case Windows: + TokenizeWindowsCommandLine(Str, *this, NewArgv); + break; + } return true; } +// String tokenization function type. Should be compatible with either +// Windows or Unix command line tokenizers. +typedef void(*TokenizerCallback)(StringRef Source, + SmallVectorImpl &NewArgv); + +// ExpandedArgs() constructors +ExpandedArgs::ExpandedArgs(ArrayRef _Args, StringSaver *_SS) + : Args(_Args.begin(), _Args.end()), ResponseBoundaries(), + StringStorage(_SS), Error(false) { + assert(_SS != nullptr && "Invalid StringSaver reference"); +} + +ExpandedArgs::ExpandedArgs(SmallVector &&_Args, + StringSaver *_SS) + : Args(std::move(_Args)), ResponseBoundaries(), StringStorage(_SS), + Error(false) { + assert(_SS != nullptr && "Invalid StringSaver reference"); +} + +ExpandedArgs::ExpandedArgs(const ExpandedArgs &&Other) + : Args(std::move(Other.Args)), + ResponseBoundaries(std::move(Other.ResponseBoundaries)), + StringStorage(Other.StringStorage), Error(Other.Error) {} + +ExpandedArgs &ExpandedArgs::operator=(ExpandedArgs &&RHS) { + Args = std::move(RHS.Args); + ResponseBoundaries = std::move(RHS.ResponseBoundaries); + StringStorage = RHS.StringStorage; + Error = RHS.Error; + return *this; +} + +const char *ExpandedArgs::SaveString(const char *Str) { + return StringStorage->SaveString(Str); +} + /// \brief Expand response files on a command line recursively using the given /// StringSaver and tokenization strategy. -bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, - SmallVectorImpl &Argv) { +void cl::ExpandedArgs::ExpandResponseFiles(TokenizerType Tokenizer) { unsigned RspFiles = 0; bool AllExpanded = true; - // Don't cache Argv.size() because it can change. - for (unsigned I = 0; I != Argv.size(); ) { - const char *Arg = Argv[I]; + // Don't cache size() because it can change. + for (unsigned I = 0; I != Args.size();) { + const char *Arg = Args[I]; if (Arg[0] != '@') { ++I; continue; @@ -670,25 +711,46 @@ // If we have too many response files, leave some unexpanded. This avoids // crashing on self-referential response files. - if (RspFiles++ > 20) - return false; + if (RspFiles++ > 20) { + AllExpanded = false; + break; + } // Replace this response file argument with the tokenization of its // contents. Nested response files are expanded in subsequent iterations. // FIXME: If a nested response file uses a relative path, is it relative to // the cwd of the process or the response file? - SmallVector ExpandedArgv; - if (!ExpandResponseFile(Arg + 1, Saver, Tokenizer, ExpandedArgv)) { + SmallVector TempBuffer; + if (!ExpandResponseFile(Arg + 1, Tokenizer, TempBuffer)) { // We couldn't read this file, so we leave it in the argument stream and // move on. AllExpanded = false; ++I; continue; } - Argv.erase(Argv.begin() + I); - Argv.insert(Argv.begin() + I, ExpandedArgv.begin(), ExpandedArgv.end()); - } - return AllExpanded; + Args.erase(Args.begin() + I); + Args.insert(Args.begin() + I, TempBuffer.begin(), TempBuffer.end()); + ResponseBoundaries.push_back( + std::pair(I, I + TempBuffer.size() - 1)); + } + Error = !AllExpanded; +} + +ExpandedArgs cl::ExpandedArgs::ExpandResponseFiles(TokenizerType Tokenizer, + ArrayRef Argv, + StringSaver *SS) { + ExpandedArgs EA(Argv, SS); + EA.ExpandResponseFiles(Tokenizer); + return EA; +} + +ExpandedArgs +cl::ExpandedArgs::ExpandResponseFiles(TokenizerType Tokenizer, + SmallVector &&Argv, + StringSaver *SS) { + ExpandedArgs EA(std::move(Argv), SS); + EA.ExpandResponseFiles(Tokenizer); + return EA; } namespace { @@ -751,13 +813,11 @@ "No options specified!"); // Expand response files. - SmallVector newArgv; - for (int i = 0; i != argc; ++i) - newArgv.push_back(argv[i]); StrDupSaver Saver; - ExpandResponseFiles(Saver, TokenizeGNUCommandLine, newArgv); - argv = &newArgv[0]; - argc = static_cast(newArgv.size()); + ExpandedArgs ExpArgs = ExpandedArgs::ExpandResponseFiles( + ExpandedArgs::GNU, ArrayRef(argv, argc), &Saver); + argv = &ExpArgs.get()[0]; + argc = static_cast(ExpArgs.size()); // Copy the program name into ProgName, making sure not to overflow it. StringRef ProgName = sys::path::filename(argv[0]);