diff --git a/clang-tools-extra/clangd/CompileCommands.h b/clang-tools-extra/clangd/CompileCommands.h --- a/clang-tools-extra/clangd/CompileCommands.h +++ b/clang-tools-extra/clangd/CompileCommands.h @@ -50,6 +50,50 @@ Memoize> ResolvedDriversNoFollow; }; +// Removes args from a command-line in a semantically-aware way. +// For instance, when "-I" is stripped: +// - so is its argument (either as -Ifoo or -I foo) +// - aliases like --include-directory=foo are also removed +// - CL-style /Ifoo will be removed if the args indicate MS-compatible mode +// - the -Xclang prefix will be dropped if present +// Args that are not recognized as flags are still removed as literal strings, +// and strip("ABC*") will remove any arg with an ABC prefix. +// +// Internally this builds a large (0.5MB) table of clang options on first use. +// Both strip() and process() are fairly cheap after that. +// +// FIXME: this reimplements much of OptTable, it might be nice to expose more. +// The table-building strategy may not make sense outside clangd. +class ArgStripper { +public: + // Adds the arg to the set which should be removed. + // Recognized clang flags are stripped semantically. When "-I" is stripped: + // - so is its argument (either as -Ifoo or -I foo) + // - aliases like --include-directory=foo are also stripped + // - CL-style /Ifoo will be removed if the args indicate MS-compatible mode + // - the -Xclang prefix will be dropped if present + // Args that are not recognized as flags are removed literally, except: + // - a leading -Xclang prefix is still removed + // - strip("ABC*") will remove any arg with an ABC prefix. + void strip(llvm::StringRef Arg); + // Remove the targets from a compile command, in-place. + void process(std::vector &Args) const; + +private: + // Deletion rules, to be checked for each arg. + struct Rule { + llvm::StringRef Text; // Rule applies only if arg begins with Text. + unsigned char Modes = 0; // Rule applies only in specified driver modes. + uint16_t ExactArgs = 0; // Num args consumed when Arg == Text. + uint16_t PrefixArgs = 0; // Num args consumed when Arg starts with Text. + }; + static llvm::ArrayRef rulesFor(llvm::StringRef Arg); + const Rule *matchingRule(llvm::StringRef Arg, unsigned Mode, + unsigned &ArgCount) const; + llvm::SmallVector Rules; + std::vector Storage; // Store strings not found in option table. +}; + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -9,8 +9,12 @@ #include "CompileCommands.h" #include "Config.h" #include "support/Logger.h" +#include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Tooling/ArgumentsAdjusters.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/MemoryBuffer.h" @@ -235,5 +239,256 @@ }; } +// ArgStripper implementation +namespace { + +// Determine total number of args consumed by this option. +// Return answers for {Exact, Prefix} match. 0 means not allowed. +std::pair getArgCount(const llvm::opt::Option &Opt) { + constexpr static unsigned Rest = 10000; // Should be all the rest! + // Reference is llvm::opt::Option::acceptInternal() + using llvm::opt::Option; + switch (Opt.getKind()) { + case Option::FlagClass: + return {1, 0}; + case Option::JoinedClass: + case Option::CommaJoinedClass: + return {1, 1}; + case Option::GroupClass: + case Option::InputClass: + case Option::UnknownClass: + case Option::ValuesClass: + return {1, 0}; + case Option::JoinedAndSeparateClass: + return {2, 2}; + case Option::SeparateClass: + return {2, 0}; + case Option::MultiArgClass: + return {1 + Opt.getNumArgs(), 0}; + case Option::JoinedOrSeparateClass: + return {2, 1}; + case Option::RemainingArgsClass: + return {Rest, 0}; + case Option::RemainingArgsJoinedClass: + return {Rest, Rest}; + } +} + +// Flag-parsing mode, which affects which flags are available. +enum DriverMode : unsigned char { + DM_None = 0, + DM_GCC = 1, // Default mode e.g. when invoked as 'clang' + DM_CL = 2, // MS CL.exe compatible mode e.g. when invoked as 'clang-cl' + DM_CC1 = 4, // When invoked as 'clang -cc1' or after '-Xclang' + DM_All = 7 +}; + +// Examine args list to determine if we're in GCC, CL-compatible, or cc1 mode. +DriverMode getDriverMode(const std::vector &Args) { + DriverMode Mode = DM_GCC; + llvm::StringRef Argv0 = Args.front(); + if (Argv0.endswith_lower(".exe")) + Argv0 = Argv0.drop_back(strlen(".exe")); + if (Argv0.endswith_lower("cl")) + Mode = DM_CL; + for (const llvm::StringRef Arg : Args) { + if (Arg == "--driver-mode=cl") { + Mode = DM_CL; + break; + } + if (Arg == "-cc1") { + Mode = DM_CC1; + break; + } + } + return Mode; +} + +// Returns the set of DriverModes where an option may be used. +unsigned char getModes(const llvm::opt::Option &Opt) { + // Why is this so complicated?! + // Reference is clang::driver::Driver::getIncludeExcludeOptionFlagMasks() + unsigned char Result = DM_None; + if (Opt.hasFlag(driver::options::CC1Option)) + Result |= DM_CC1; + if (!Opt.hasFlag(driver::options::NoDriverOption)) { + if (Opt.hasFlag(driver::options::CLOption)) { + Result |= DM_CL; + } else { + Result |= DM_GCC; + if (Opt.hasFlag(driver::options::CoreOption)) { + Result |= DM_CL; + } + } + } + return Result; +}; + +} // namespace + +llvm::ArrayRef ArgStripper::rulesFor(llvm::StringRef Arg) { + // All the hard work is done once in a static initializer. + // We compute a table containing strings to look for and #args to skip. + // e.g. "-x" => {-x 2 args, -x* 1 arg, --language 2 args, --language=* 1 arg} + using TableTy = + llvm::StringMap, llvm::BumpPtrAllocator>; + static TableTy *Table = [] { + auto &DriverTable = driver::getDriverOptTable(); + using DriverID = clang::driver::options::ID; + + // Collect sets of aliases, so we can treat -foo and -foo= as synonyms. + // Conceptually a double-linked list: PrevAlias[I] -> I -> NextAlias[I]. + // If PrevAlias[I] is INVALID, then I is canonical. + DriverID PrevAlias[DriverID::LastOption] = {DriverID::OPT_INVALID}; + DriverID NextAlias[DriverID::LastOption] = {DriverID::OPT_INVALID}; + auto AddAlias = [&](DriverID Self, DriverID T) { + if (NextAlias[T]) { + PrevAlias[NextAlias[T]] = Self; + NextAlias[Self] = NextAlias[T]; + } + PrevAlias[Self] = T; + NextAlias[T] = Self; + }; + // Also grab prefixes for each option, these are not fully exposed. + const char *const *Prefixes[DriverID::LastOption] = {nullptr}; +#define PREFIX(NAME, VALUE) static const char *const NAME[] = VALUE; +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, METAVAR, VALUES) \ + if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && ALIASARGS == nullptr) \ + AddAlias(DriverID::OPT_##ID, DriverID::OPT_##ALIAS); \ + Prefixes[DriverID::OPT_##ID] = PREFIX; +#include "clang/Driver/Options.inc" +#undef OPTION +#undef PREFIX + + auto Result = std::make_unique(); + // Iterate over distinct options (represented by the canonical alias). + // Every spelling of this option will get the same set of rules. + for (unsigned ID = 1 /*Skip INVALID */; ID < DriverID::LastOption; ++ID) { + if (PrevAlias[ID] || ID == DriverID::OPT_Xclang) + continue; // Not canonical, or specially handled. + llvm::SmallVector Rules; + // Iterate over each alias, to add rules for parsing it. + for (unsigned A = ID; A != DriverID::OPT_INVALID; A = NextAlias[A]) { + if (Prefixes[A] == nullptr) // option groups. + continue; + auto Opt = DriverTable.getOption(A); + // Exclude - and -foo pseudo-options. + if (Opt.getName().empty()) + continue; + auto Modes = getModes(Opt); + std::pair ArgCount = getArgCount(Opt); + // Iterate over each spelling of the alias, e.g. -foo vs --foo. + for (auto *Prefix = Prefixes[A]; *Prefix != nullptr; ++Prefix) { + llvm::SmallString<64> Buf(*Prefix); + Buf.append(Opt.getName()); + llvm::StringRef Spelling = Result->try_emplace(Buf).first->getKey(); + Rules.emplace_back(); + Rules.back().Text = Spelling; + Rules.back().Modes = Modes; + Rules.back().ExactArgs = ArgCount.first; + Rules.back().PrefixArgs = ArgCount.second; + } + } + // Register the set of rules under each possible name. + for (const auto &R : Rules) + Result->find(R.Text)->second.append(Rules.begin(), Rules.end()); + } +#ifndef NDEBUG + // Dump the table and various measures of its size. + unsigned RuleCount = 0; + dlog("ArgStripper Option spelling table"); + for (const auto &Entry : *Result) { + dlog("{0}", Entry.first()); + RuleCount += Entry.second.size(); + for (const auto &R : Entry.second) + dlog(" {0} #={1} *={2} Mode={3}", R.Text, R.ExactArgs, R.PrefixArgs, + int(R.Modes)); + } + dlog("Table spellings={0} rules={1} string-bytes={2}", Result->size(), + RuleCount, Result->getAllocator().getBytesAllocated()); +#endif + // The static table will never be destroyed. + return Result.release(); + }(); + + auto It = Table->find(Arg); + return (It == Table->end()) ? llvm::ArrayRef() : It->second; +} + +void ArgStripper::strip(llvm::StringRef Arg) { + auto OptionRules = rulesFor(Arg); + if (OptionRules.empty()) { + // Not a recognized flag. Strip it literally. + Storage.emplace_back(Arg); + Rules.emplace_back(); + Rules.back().Text = Storage.back(); + Rules.back().ExactArgs = 1; + if (Rules.back().Text.consume_back("*")) + Rules.back().PrefixArgs = 1; + Rules.back().Modes = DM_All; + } else { + Rules.append(OptionRules.begin(), OptionRules.end()); + } +} + +const ArgStripper::Rule *ArgStripper::matchingRule(llvm::StringRef Arg, + unsigned Mode, + unsigned &ArgCount) const { + for (const Rule &R : Rules) { + // Rule can fail to match if... + if (!(R.Modes & Mode)) + continue; // not applicable to current driver mode + if (!Arg.startswith(R.Text)) + continue; // current arg doesn't match the prefix string + bool PrefixMatch = Arg.size() > R.Text.size(); + ArgCount = PrefixMatch ? R.PrefixArgs : R.ExactArgs; + if (ArgCount != 0) // Can rule apply as an exact/prefix match? + return &R; + } + return nullptr; +} + +void ArgStripper::process(std::vector &Args) const { + if (Args.empty()) + return; + + // We're parsing the args list in some mode (e.g. gcc-compatible) but may + // temporarily switch to another mode with the -Xclang flag. + DriverMode MainMode = getDriverMode(Args); + DriverMode CurrentMode = MainMode; + + // Read and write heads for in-place deletion. + unsigned Read = 0, Write = 0; + bool WasXclang = false; + while (Read < Args.size()) { + unsigned ArgCount = 0; + if (const Rule *R = matchingRule(Args[Read], CurrentMode, ArgCount)) { + // Delete it and its args. + if (WasXclang) { + assert(Write > 0); + --Write; // Drop previous -Xclang arg + CurrentMode = MainMode; + WasXclang = false; + } + // Advance to last arg. An arg may be foo or -Xclang foo. + for (unsigned I = 1; Read < Args.size() && I < ArgCount; ++I) { + ++Read; + if (Read < Args.size() && Args[Read] == "-Xclang") + ++Read; + } + } else { + // No match, just copy the arg through. + WasXclang = Args[Read] == "-Xclang"; + CurrentMode = WasXclang ? DM_CC1 : MainMode; + if (Write != Read) + Args[Write] = std::move(Args[Read]); + ++Write; + } + ++Read; + } + Args.resize(Write); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp --- a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp +++ b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp @@ -207,6 +207,120 @@ EXPECT_THAT(Cmd, ElementsAre(_, "FOO.CC", "--hello", "-fsyntax-only")); } +static std::string strip(llvm::StringRef Arg, llvm::StringRef Argv) { + llvm::SmallVector Parts; + llvm::SplitString(Argv, Parts); + std::vector Args = {Parts.begin(), Parts.end()}; + ArgStripper S; + S.strip(Arg); + S.process(Args); + return llvm::join(Args, " "); +} + +TEST(ArgStripperTest, Spellings) { + // May use alternate prefixes. + EXPECT_EQ(strip("-pedantic", "clang -pedantic foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-pedantic", "clang --pedantic foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("--pedantic", "clang -pedantic foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("--pedantic", "clang --pedantic foo.cc"), "clang foo.cc"); + // May use alternate names. + EXPECT_EQ(strip("-x", "clang -x c++ foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-x", "clang --language=c++ foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("--language=", "clang -x c++ foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("--language=", "clang --language=c++ foo.cc"), + "clang foo.cc"); +} + +TEST(ArgStripperTest, UnknownFlag) { + EXPECT_EQ(strip("-xyzzy", "clang -xyzzy foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-xyz*", "clang -xyzzy foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-xyzzy", "clang -Xclang -xyzzy foo.cc"), "clang foo.cc"); +} + +TEST(ArgStripperTest, Xclang) { + // Flags may be -Xclang escaped. + EXPECT_EQ(strip("-ast-dump", "clang -Xclang -ast-dump foo.cc"), + "clang foo.cc"); + // Args may be -Xclang escaped. + EXPECT_EQ(strip("-add-plugin", "clang -Xclang -add-plugin -Xclang z foo.cc"), + "clang foo.cc"); +} + +TEST(ArgStripperTest, ClangCL) { + // /I is a synonym for -I in clang-cl mode only. + // Not stripped by default. + EXPECT_EQ(strip("-I", "clang -I /usr/inc /Interesting/file.cc"), + "clang /Interesting/file.cc"); + // Stripped when invoked as clang-cl. + EXPECT_EQ(strip("-I", "clang-cl -I /usr/inc /Interesting/file.cc"), + "clang-cl"); + // Stripped when invoked as CL.EXE + EXPECT_EQ(strip("-I", "CL.EXE -I /usr/inc /Interesting/file.cc"), "CL.EXE"); + // Stripped when passed --driver-mode=cl. + EXPECT_EQ(strip("-I", "cc -I /usr/inc /Interesting/file.cc --driver-mode=cl"), + "cc --driver-mode=cl"); +} + +TEST(ArgStripperTest, ArgStyles) { + // Flag + EXPECT_EQ(strip("-Qn", "clang -Qn foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-Qn", "clang -QnZ foo.cc"), "clang -QnZ foo.cc"); + // Joined + EXPECT_EQ(strip("-std=", "clang -std= foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-std=", "clang -std=c++11 foo.cc"), "clang foo.cc"); + // Separate + EXPECT_EQ(strip("-mllvm", "clang -mllvm X foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-mllvm", "clang -mllvmX foo.cc"), "clang -mllvmX foo.cc"); + // RemainingArgsJoined + EXPECT_EQ(strip("/link", "clang-cl /link b c d foo.cc"), "clang-cl"); + EXPECT_EQ(strip("/link", "clang-cl /linka b c d foo.cc"), "clang-cl"); + // CommaJoined + EXPECT_EQ(strip("-Wl,", "clang -Wl,x,y foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-Wl,", "clang -Wl, foo.cc"), "clang foo.cc"); + // MultiArg + EXPECT_EQ(strip("-segaddr", "clang -segaddr a b foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-segaddr", "clang -segaddra b foo.cc"), + "clang -segaddra b foo.cc"); + // JoinedOrSeparate + EXPECT_EQ(strip("-G", "clang -GX foo.cc"), "clang foo.cc"); + EXPECT_EQ(strip("-G", "clang -G X foo.cc"), "clang foo.cc"); + // JoinedAndSeparate + EXPECT_EQ(strip("-plugin-arg-", "clang -cc1 -plugin-arg-X Y foo.cc"), + "clang -cc1 foo.cc"); + EXPECT_EQ(strip("-plugin-arg-", "clang -cc1 -plugin-arg- Y foo.cc"), + "clang -cc1 foo.cc"); +} + +TEST(ArgStripperTest, EndOfList) { + // When we hit the end-of-args prematurely, we don't crash. + // We consume the incomplete args if we've matched the target option. + EXPECT_EQ(strip("-I", "clang -Xclang"), "clang -Xclang"); + EXPECT_EQ(strip("-I", "clang -Xclang -I"), "clang"); + EXPECT_EQ(strip("-I", "clang -I -Xclang"), "clang"); + EXPECT_EQ(strip("-I", "clang -I"), "clang"); +} + +TEST(ArgStripperTest, Multiple) { + ArgStripper S; + S.strip("-o"); + S.strip("-c"); + std::vector Args = {"clang", "-o", "foo.o", "foo.cc", "-c"}; + S.process(Args); + EXPECT_THAT(Args, ElementsAre("clang", "foo.cc")); +} + +TEST(ArgStripperTest, OrderDependent) { + ArgStripper S; + // If -include is stripped first, we see -pch as its arg and foo.pch remains. + // To get this case right, we must process -include-pch first. + S.strip("-include"); + S.strip("-include-pch"); + std::vector Args = {"clang", "-include-pch", "foo.pch", + "foo.cc"}; + S.process(Args); + EXPECT_THAT(Args, ElementsAre("clang", "foo.cc")); +} + } // namespace } // namespace clangd } // namespace clang