diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst --- a/clang/docs/SanitizerSpecialCaseList.rst +++ b/clang/docs/SanitizerSpecialCaseList.rst @@ -15,7 +15,7 @@ Goal and usage ============== -User of sanitizer tools, such as :doc:`AddressSanitizer`, :doc:`ThreadSanitizer` +Users of sanitizer tools, such as :doc:`AddressSanitizer`, :doc:`ThreadSanitizer` or :doc:`MemorySanitizer` may want to disable or alter some checks for certain source-level entities to: @@ -54,17 +54,16 @@ Ignorelists consist of entries, optionally grouped into sections. Empty lines and lines starting with "#" are ignored. -Section names are regular expressions written in square brackets that denote +Section names are globs written in square brackets that denote which sanitizer the following entries apply to. For example, ``[address]`` -specifies AddressSanitizer while ``[cfi-vcall|cfi-icall]`` specifies Control +specifies AddressSanitizer while ``[{cfi-vcall,cfi-icall}]`` specifies Control Flow Integrity virtual and indirect call checking. Entries without a section will be placed under the ``[*]`` section applying to all enabled sanitizers. -Entries contain an entity type, followed by a colon and a regular expression, +Entries contain an entity type, followed by a colon and a glob, specifying the names of the entities, optionally followed by an equals sign and -a tool-specific category, e.g. ``fun:*ExampleFunc=example_category``. The -meaning of ``*`` in regular expression for entity names is different - it is -treated as in shell wildcarding. Two generic entity types are ``src`` and +a tool-specific category, e.g. ``fun:*ExampleFunc=example_category``. +Two generic entity types are ``src`` and ``fun``, which allow users to specify source files and functions, respectively. Some sanitizer tools may introduce custom entity types and categories - refer to tool-specific docs. @@ -72,19 +71,19 @@ .. code-block:: bash # Lines starting with # are ignored. - # Turn off checks for the source file (use absolute path or path relative - # to the current working directory): - src:/path/to/source/file.c + # Turn off checks for the source file + # Entries without sections are placed into [*] and apply to all sanitizers + src:path/to/source/file.c + src:*/source/file.c # Turn off checks for this main file, including files included by it. # Useful when the main file instead of an included file should be ignored. mainfile:file.c # Turn off checks for a particular functions (use mangled names): - fun:MyFooBar fun:_Z8MyFooBarv - # Extended regular expressions are supported: - fun:bad_(foo|bar) + # Glob brace expansions and character ranges are supported + fun:bad_{foo,bar} src:bad_source[1-9].c - # Shell like usage of * is supported (* is treated as .*): + # "*" matches zero or more characters src:bad/sources/* fun:*BadFunction* # Specific sanitizer tools may introduce categories. @@ -92,10 +91,9 @@ # Sections can be used to limit ignorelist entries to specific sanitizers [address] fun:*BadASanFunc* - # Section names are regular expressions - [cfi-vcall|cfi-icall] + # Section names are globs + [{cfi-vcall,cfi-icall}] fun:*BadCfiCall - # Entries without sections are placed into [*] and apply to all sanitizers ``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but does not need plumbing into the build system. This works well for internal diff --git a/clang/lib/Basic/ProfileList.cpp b/clang/lib/Basic/ProfileList.cpp --- a/clang/lib/Basic/ProfileList.cpp +++ b/clang/lib/Basic/ProfileList.cpp @@ -36,8 +36,8 @@ bool isEmpty() const { return Sections.empty(); } bool hasPrefix(StringRef Prefix) const { - for (auto &SectionIter : Sections) - if (SectionIter.Entries.count(Prefix) > 0) + for (const auto &It : Sections) + if (It.second.Entries.count(Prefix) > 0) return true; return false; } diff --git a/clang/lib/Basic/SanitizerSpecialCaseList.cpp b/clang/lib/Basic/SanitizerSpecialCaseList.cpp --- a/clang/lib/Basic/SanitizerSpecialCaseList.cpp +++ b/clang/lib/Basic/SanitizerSpecialCaseList.cpp @@ -37,7 +37,8 @@ } void SanitizerSpecialCaseList::createSanitizerSections() { - for (auto &S : Sections) { + for (auto &It : Sections) { + auto &S = It.second; SanitizerMask Mask; #define SANITIZER(NAME, ID) \ diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -5,47 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception //===----------------------------------------------------------------------===// // -// This is a utility class used to parse user-provided text files with -// "special case lists" for code sanitizers. Such files are used to -// define an "ABI list" for DataFlowSanitizer and allow/exclusion lists for -// sanitizers like AddressSanitizer or UndefinedBehaviorSanitizer. -// -// Empty lines and lines starting with "#" are ignored. Sections are defined -// using a '[section_name]' header and can be used to specify sanitizers the -// entries below it apply to. Section names are regular expressions, and -// entries without a section header match all sections (e.g. an '[*]' header -// is assumed.) -// The remaining lines should have the form: -// prefix:wildcard_expression[=category] -// If category is not specified, it is assumed to be empty string. -// Definitions of "prefix" and "category" are sanitizer-specific. For example, -// sanitizer exclusion support prefixes "src", "mainfile", "fun" and "global". -// Wildcard expressions define, respectively, source files, main files, -// functions or globals which shouldn't be instrumented. -// Examples of categories: -// "functional": used in DFSan to list functions with pure functional -// semantics. -// "init": used in ASan exclusion list to disable initialization-order bugs -// detection for certain globals or source files. -// Full special case list file example: -// --- -// [address] -// # Excluded items: -// fun:*_ZN4base6subtle* -// global:*global_with_bad_access_or_initialization* -// global:*global_with_initialization_issues*=init -// type:*Namespace::ClassName*=init -// src:file_with_tricky_code.cc -// src:ignore-global-initializers-issues.cc=init -// mainfile:main_file.cc -// -// [dataflow] -// # Functions with pure functional semantics: -// fun:cos=functional -// fun:sin=functional -// --- -// Note that the wild card is in fact an llvm::Regex, but * is automatically -// replaced with .* +// This file implements a Special Case List for code sanitizers. // //===----------------------------------------------------------------------===// @@ -53,7 +13,8 @@ #define LLVM_SUPPORT_SPECIALCASELIST_H #include "llvm/ADT/StringMap.h" -#include "llvm/Support/Regex.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/GlobPattern.h" #include #include #include @@ -66,6 +27,45 @@ class FileSystem; } +/// This is a utility class used to parse user-provided text files with +/// "special case lists" for code sanitizers. Such files are used to +/// define an "ABI list" for DataFlowSanitizer and allow/exclusion lists for +/// sanitizers like AddressSanitizer or UndefinedBehaviorSanitizer. +/// +/// Empty lines and lines starting with "#" are ignored. Sections are defined +/// using a '[section_name]' header and can be used to specify sanitizers the +/// entries below it apply to. Section names are globs, and +/// entries without a section header match all sections (e.g. an '[*]' header +/// is assumed.) +/// The remaining lines should have the form: +/// prefix:glob_pattern[=category] +/// If category is not specified, it is assumed to be empty string. +/// Definitions of "prefix" and "category" are sanitizer-specific. For example, +/// sanitizer exclusion support prefixes "src", "mainfile", "fun" and "global". +/// "glob_pattern" defines source files, main files, functions or globals which +/// shouldn't be instrumented. +/// Examples of categories: +/// "functional": used in DFSan to list functions with pure functional +/// semantics. +/// "init": used in ASan exclusion list to disable initialization-order bugs +/// detection for certain globals or source files. +/// Full special case list file example: +/// --- +/// [address] +/// # Excluded items: +/// fun:*_ZN4base6subtle* +/// global:*global_with_bad_access_or_initialization* +/// global:*global_with_initialization_issues*=init +/// type:*Namespace::ClassName*=init +/// src:file_with_tricky_code.cc +/// src:ignore-global-initializers-issues.cc=init +/// mainfile:main_file.cc +/// +/// [dataflow] +/// # Functions with pure functional semantics: +/// fun:cos=functional +/// fun:sin=functional +/// --- class SpecialCaseList { public: /// Parses the special case list entries from files. On failure, returns @@ -88,7 +88,7 @@ /// \code /// @Prefix:=@Category /// \endcode - /// where @Query satisfies wildcard expression in a given @Section. + /// where @Query satisfies the glob in a given @Section. bool inSection(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category = StringRef()) const; @@ -97,7 +97,7 @@ /// \code /// @Prefix:=@Category /// \endcode - /// where @Query satisfies wildcard expression in a given @Section. + /// where @Query satisfies the glob in a given @Section. /// Returns zero if there is no exclusion entry corresponding to this /// expression. unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, @@ -114,36 +114,34 @@ SpecialCaseList(SpecialCaseList const &) = delete; SpecialCaseList &operator=(SpecialCaseList const &) = delete; - /// Represents a set of regular expressions. Regular expressions which are - /// "literal" (i.e. no regex metacharacters) are stored in Strings. The - /// reason for doing so is efficiency; StringMap is much faster at matching - /// literal strings than Regex. + /// Represents a set of globs and their line numbers class Matcher { public: - bool insert(std::string Regexp, unsigned LineNumber, std::string &REError); + Error insert(StringRef Pattern, unsigned LineNumber); // Returns the line number in the source file that this query matches to. // Returns zero if no match is found. unsigned match(StringRef Query) const; private: - StringMap Strings; - std::vector, unsigned>> RegExes; + StringMap> Globs; }; using SectionEntries = StringMap>; struct Section { Section(std::unique_ptr M) : SectionMatcher(std::move(M)){}; + Section() : Section(std::make_unique()) {} std::unique_ptr SectionMatcher; SectionEntries Entries; }; - std::vector
Sections; + StringMap
Sections; + + Expected
addSection(StringRef SectionStr, unsigned LineNo); /// Parses just-constructed SpecialCaseList entries from a memory buffer. - bool parse(const MemoryBuffer *MB, StringMap &SectionsMap, - std::string &Error); + bool parse(const MemoryBuffer *MB, std::string &Error); // Helper method for derived classes to search by Prefix, Query, and Category // once they have already resolved a section entry. diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -15,57 +15,41 @@ #include "llvm/Support/SpecialCaseList.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" #include "llvm/Support/VirtualFileSystem.h" +#include #include #include #include -#include namespace llvm { -bool SpecialCaseList::Matcher::insert(std::string Regexp, - unsigned LineNumber, - std::string &REError) { - if (Regexp.empty()) { - REError = "Supplied regexp was blank"; - return false; - } - - if (Regex::isLiteralERE(Regexp)) { - Strings[Regexp] = LineNumber; - return true; - } - - // Replace * with .* - for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos; - pos += strlen(".*")) { - Regexp.replace(pos, strlen("*"), ".*"); +Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) { + if (Pattern.empty()) + return createStringError(errc::invalid_argument, "Supplied glob was blank"); + + auto [It, DidEmplace] = Globs.try_emplace(Pattern); + if (DidEmplace) { + // We must be sure to use the string in the map rather than the provided + // reference which could be destroyed before match() is called + Pattern = It->getKey(); + auto &Pair = It->getValue(); + if (auto Err = GlobPattern::create(Pattern).moveInto(Pair.first)) + return Err; + Pair.second = LineNumber; } - - Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str(); - - // Check that the regexp is valid. - Regex CheckRE(Regexp); - if (!CheckRE.isValid(REError)) - return false; - - RegExes.emplace_back( - std::make_pair(std::make_unique(std::move(CheckRE)), LineNumber)); - return true; + return Error::success(); } unsigned SpecialCaseList::Matcher::match(StringRef Query) const { - auto It = Strings.find(Query); - if (It != Strings.end()) - return It->second; - for (const auto &RegExKV : RegExes) - if (RegExKV.first->match(Query)) - return RegExKV.second; + for (const auto &[Pattern, Pair] : Globs) + if (Pair.first.match(Query)) + return Pair.second; return 0; } +// TODO: Refactor this to return Expected<...> std::unique_ptr SpecialCaseList::create(const std::vector &Paths, llvm::vfs::FileSystem &FS, std::string &Error) { @@ -94,7 +78,6 @@ bool SpecialCaseList::createInternal(const std::vector &Paths, vfs::FileSystem &VFS, std::string &Error) { - StringMap Sections; for (const auto &Path : Paths) { ErrorOr> FileOrErr = VFS.getBufferForFile(Path); @@ -103,7 +86,7 @@ return false; } std::string ParseError; - if (!parse(FileOrErr.get().get(), Sections, ParseError)) { + if (!parse(FileOrErr.get().get(), ParseError)) { Error = (Twine("error parsing file '") + Path + "': " + ParseError).str(); return false; } @@ -113,82 +96,73 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB, std::string &Error) { - StringMap Sections; - if (!parse(MB, Sections, Error)) + if (!parse(MB, Error)) return false; return true; } -bool SpecialCaseList::parse(const MemoryBuffer *MB, - StringMap &SectionsMap, - std::string &Error) { - // Iterate through each line in the exclusion list file. - SmallVector Lines; - MB->getBuffer().split(Lines, '\n'); +Expected +SpecialCaseList::addSection(StringRef SectionStr, unsigned LineNo) { + auto [It, DidEmplace] = Sections.try_emplace(SectionStr); + auto &Section = It->getValue(); + if (DidEmplace) + if (auto Err = Section.SectionMatcher->insert(SectionStr, LineNo)) + return createStringError(errc::invalid_argument, + "malformed section at line " + Twine(LineNo) + + ": '" + SectionStr + + "': " + toString(std::move(Err))); + return &Section; +} - unsigned LineNo = 1; - StringRef Section = "*"; +bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { + Section *CurrentSection; + if (auto Err = addSection("*", 1).moveInto(CurrentSection)) { + Error = toString(std::move(Err)); + return false; + } - for (auto I = Lines.begin(), E = Lines.end(); I != E; ++I, ++LineNo) { - *I = I->trim(); - // Ignore empty lines and lines starting with "#" - if (I->empty() || I->startswith("#")) + // TODO: Consider using the first line as a config to select using regex or + // glob patterns like: + // "#!regex" or "#!glob" + + for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); + !LineIt.is_at_eof(); LineIt++) { + unsigned LineNo = LineIt.line_number(); + StringRef Line = LineIt->trim(); + if (Line.empty()) continue; // Save section names - if (I->startswith("[")) { - if (!I->endswith("]")) { - Error = (Twine("malformed section header on line ") + Twine(LineNo) + - ": " + *I).str(); - return false; - } - - Section = I->slice(1, I->size() - 1); - - std::string REError; - Regex CheckRE(Section); - if (!CheckRE.isValid(REError)) { + if (Line.startswith("[")) { + if (!Line.endswith("]")) { Error = - (Twine("malformed regex for section ") + Section + ": '" + REError) + ("malformed section header on line " + Twine(LineNo) + ": " + Line) .str(); return false; } + if (auto Err = addSection(Line.drop_front().drop_back(), LineNo) + .moveInto(CurrentSection)) { + Error = toString(std::move(Err)); + return false; + } continue; } - // Get our prefix and unparsed regexp. - std::pair SplitLine = I->split(":"); - StringRef Prefix = SplitLine.first; - if (SplitLine.second.empty()) { + // Get our prefix and unparsed glob. + auto [Prefix, Postfix] = Line.split(":"); + if (Postfix.empty()) { // Missing ':' in the line. - Error = (Twine("malformed line ") + Twine(LineNo) + ": '" + - SplitLine.first + "'").str(); + Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str(); return false; } - std::pair SplitRegexp = SplitLine.second.split("="); - std::string Regexp = std::string(SplitRegexp.first); - StringRef Category = SplitRegexp.second; - - // Create this section if it has not been seen before. - if (!SectionsMap.contains(Section)) { - std::unique_ptr M = std::make_unique(); - std::string REError; - if (!M->insert(std::string(Section), LineNo, REError)) { - Error = (Twine("malformed section ") + Section + ": '" + REError).str(); - return false; - } - - SectionsMap[Section] = Sections.size(); - Sections.emplace_back(std::move(M)); - } - - auto &Entry = Sections[SectionsMap[Section]].Entries[Prefix][Category]; - std::string REError; - if (!Entry.insert(std::move(Regexp), LineNo, REError)) { - Error = (Twine("malformed regex in line ") + Twine(LineNo) + ": '" + - SplitLine.second + "': " + REError).str(); + auto [Pattern, Category] = Postfix.split("="); + auto &Entry = CurrentSection->Entries[Prefix][Category]; + if (auto Err = Entry.insert(Pattern, LineNo)) { + Error = ("malformed glob in line " + Twine(LineNo) + ": '" + Pattern + + "': " + toString(std::move(Err))) + .str(); return false; } } @@ -205,13 +179,14 @@ unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category) const { - for (const auto &SectionIter : Sections) - if (SectionIter.SectionMatcher->match(Section)) { - unsigned Blame = - inSectionBlame(SectionIter.Entries, Prefix, Query, Category); + for (const auto &It : Sections) { + const auto &S = It.getValue(); + if (S.SectionMatcher->match(Section)) { + unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category); if (Blame) return Blame; } + } return 0; } @@ -226,4 +201,4 @@ return II->getValue().match(Query); } -} // namespace llvm +} // namespace llvm diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp --- a/llvm/unittests/Support/SpecialCaseListTest.cpp +++ b/llvm/unittests/Support/SpecialCaseListTest.cpp @@ -10,8 +10,11 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/VirtualFileSystem.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" +using testing::HasSubstr; +using testing::StartsWith; using namespace llvm; namespace { @@ -50,7 +53,8 @@ "src:hello\n" "src:bye\n" "src:hi=category\n" - "src:z*=category\n"); + "src:z*=category\n" + " \n"); EXPECT_TRUE(SCL->inSection("", "src", "hello")); EXPECT_TRUE(SCL->inSection("", "src", "bye")); EXPECT_TRUE(SCL->inSection("", "src", "hi", "category")); @@ -74,31 +78,29 @@ "\n" "[not valid\n", Error)); - EXPECT_TRUE( - ((StringRef)Error).startswith("malformed section header on line 3:")); + EXPECT_THAT(Error, StartsWith("malformed section header on line 3:")); EXPECT_EQ(nullptr, makeSpecialCaseList("\n\n\n" "[not valid\n", Error)); - EXPECT_TRUE( - ((StringRef)Error).startswith("malformed section header on line 4:")); + EXPECT_THAT(Error, StartsWith("malformed section header on line 4:")); } -TEST_F(SpecialCaseListTest, SectionRegexErrorHandling) { +TEST_F(SpecialCaseListTest, SectionGlobErrorHandling) { std::string Error; EXPECT_EQ(makeSpecialCaseList("[address", Error), nullptr); - EXPECT_TRUE(((StringRef)Error).startswith("malformed section header ")); + EXPECT_THAT(Error, StartsWith("malformed section header ")); EXPECT_EQ(makeSpecialCaseList("[[]", Error), nullptr); - EXPECT_TRUE(((StringRef)Error).startswith("malformed regex for section [: ")); + EXPECT_EQ(Error, "malformed section at line 1: '[': invalid glob pattern: ["); EXPECT_EQ(makeSpecialCaseList("src:=", Error), nullptr); - EXPECT_TRUE(((StringRef)Error).endswith("Supplied regexp was blank")); + EXPECT_THAT(Error, HasSubstr("Supplied glob was blank")); } TEST_F(SpecialCaseListTest, Section) { std::unique_ptr SCL = makeSpecialCaseList("src:global\n" - "[sect1|sect2]\n" + "[{sect1,sect2}]\n" "src:test1\n" "[sect3*]\n" "src:test2\n"); @@ -154,17 +156,12 @@ EXPECT_EQ(nullptr, makeSpecialCaseList("badline", Error)); EXPECT_EQ("malformed line 1: 'badline'", Error); EXPECT_EQ(nullptr, makeSpecialCaseList("src:bad[a-", Error)); - EXPECT_EQ("malformed regex in line 1: 'bad[a-': invalid character range", - Error); - EXPECT_EQ(nullptr, makeSpecialCaseList("src:a.c\n" - "fun:fun(a\n", - Error)); - EXPECT_EQ("malformed regex in line 2: 'fun(a': parentheses not balanced", + EXPECT_EQ("malformed glob in line 1: 'bad[a-': invalid glob pattern: bad[a-", Error); std::vector Files(1, "unexisting"); EXPECT_EQ(nullptr, SpecialCaseList::create(Files, *vfs::getRealFileSystem(), Error)); - EXPECT_EQ(0U, Error.find("can't open file 'unexisting':")); + EXPECT_THAT(Error, StartsWith("can't open file 'unexisting':")); } TEST_F(SpecialCaseListTest, EmptySpecialCaseList) { @@ -191,7 +188,7 @@ } TEST_F(SpecialCaseListTest, NoTrigramsInRules) { - std::unique_ptr SCL = makeSpecialCaseList("fun:b.r\n" + std::unique_ptr SCL = makeSpecialCaseList("fun:b?r\n" "fun:za*az\n"); EXPECT_TRUE(SCL->inSection("", "fun", "bar")); EXPECT_FALSE(SCL->inSection("", "fun", "baz"));