Skip to content

Commit 40d6663

Browse files
committedNov 7, 2017
Extend SpecialCaseList to allow users to blame matches on entries in the file.
Summary: Extends SCL functionality to allow users to find the line number in the file the SCL is built from through SpecialCaseList::inSectionBlame(...). Also removes the need to compile the SCL before use. As the matcher now contains a list of regexes to test against instead of a single regex, the regexes can be individually built on each insertion rather than one large compilation at the end of construction. This change also fixes a bug where blank lines would cause the parser to become out-of-sync with the line number. An error on line `k` was being reported as being on line `k - num_blank_lines_before_k`. Note: This change has a cyclical dependency on D39486. Both these changes must be submitted at the same time to avoid a build breakage. Reviewers: vlad.tsyrklevich Reviewed By: vlad.tsyrklevich Subscribers: kcc, pcc, llvm-commits Differential Revision: https://reviews.llvm.org/D39485 llvm-svn: 317617
1 parent 512fa40 commit 40d6663

File tree

3 files changed

+83
-61
lines changed

3 files changed

+83
-61
lines changed
 

‎llvm/include/llvm/Support/SpecialCaseList.h

+22-15
Original file line numberDiff line numberDiff line change
@@ -89,32 +89,43 @@ class SpecialCaseList {
8989
bool inSection(StringRef Section, StringRef Prefix, StringRef Query,
9090
StringRef Category = StringRef()) const;
9191

92+
/// Returns the line number corresponding to the special case list entry if
93+
/// the special case list contains a line
94+
/// \code
95+
/// @Prefix:<E>=@Category
96+
/// \endcode
97+
/// where @Query satisfies wildcard expression <E> in a given @Section.
98+
/// Returns zero if there is no blacklist entry corresponding to this
99+
/// expression.
100+
unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
101+
StringRef Category = StringRef()) const;
102+
92103
protected:
93104
// Implementations of the create*() functions that can also be used by derived
94105
// classes.
95106
bool createInternal(const std::vector<std::string> &Paths,
96107
std::string &Error);
97108
bool createInternal(const MemoryBuffer *MB, std::string &Error);
98109

110+
SpecialCaseList() = default;
99111
SpecialCaseList(SpecialCaseList const &) = delete;
100112
SpecialCaseList &operator=(SpecialCaseList const &) = delete;
101113

102114
/// Represents a set of regular expressions. Regular expressions which are
103-
/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
104-
/// others are represented as a single pipe-separated regex in RegEx. The
105-
/// reason for doing so is efficiency; StringSet is much faster at matching
115+
/// "literal" (i.e. no regex metacharacters) are stored in Strings. The
116+
/// reason for doing so is efficiency; StringMap is much faster at matching
106117
/// literal strings than Regex.
107118
class Matcher {
108119
public:
109-
bool insert(std::string Regexp, std::string &REError);
110-
void compile();
111-
bool match(StringRef Query) const;
120+
bool insert(std::string Regexp, unsigned LineNumber, std::string &REError);
121+
// Returns the line number in the source file that this query matches to.
122+
// Returns zero if no match is found.
123+
unsigned match(StringRef Query) const;
112124

113125
private:
114-
StringSet<> Strings;
126+
StringMap<unsigned> Strings;
115127
TrigramIndex Trigrams;
116-
std::unique_ptr<Regex> RegEx;
117-
std::string UncompiledRegEx;
128+
std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
118129
};
119130

120131
using SectionEntries = StringMap<StringMap<Matcher>>;
@@ -127,19 +138,15 @@ class SpecialCaseList {
127138
};
128139

129140
std::vector<Section> Sections;
130-
bool IsCompiled;
131141

132-
SpecialCaseList();
133142
/// Parses just-constructed SpecialCaseList entries from a memory buffer.
134143
bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap,
135144
std::string &Error);
136-
/// compile() should be called once, after parsing all the memory buffers.
137-
void compile();
138145

139146
// Helper method for derived classes to search by Prefix, Query, and Category
140147
// once they have already resolved a section entry.
141-
bool inSection(const SectionEntries &Entries, StringRef Prefix,
142-
StringRef Query, StringRef Category) const;
148+
unsigned inSectionBlame(const SectionEntries &Entries, StringRef Prefix,
149+
StringRef Query, StringRef Category) const;
143150
};
144151

145152
} // namespace llvm

‎llvm/lib/Support/SpecialCaseList.cpp

+37-46
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,15 @@
2727
namespace llvm {
2828

2929
bool SpecialCaseList::Matcher::insert(std::string Regexp,
30+
unsigned LineNumber,
3031
std::string &REError) {
3132
if (Regexp.empty()) {
3233
REError = "Supplied regexp was blank";
3334
return false;
3435
}
3536

3637
if (Regex::isLiteralERE(Regexp)) {
37-
Strings.insert(Regexp);
38+
Strings[Regexp] = LineNumber;
3839
return true;
3940
}
4041
Trigrams.insert(Regexp);
@@ -45,34 +46,30 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp,
4546
Regexp.replace(pos, strlen("*"), ".*");
4647
}
4748

49+
Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
50+
4851
// Check that the regexp is valid.
4952
Regex CheckRE(Regexp);
5053
if (!CheckRE.isValid(REError))
5154
return false;
5255

53-
if (!UncompiledRegEx.empty())
54-
UncompiledRegEx += "|";
55-
UncompiledRegEx += "^(" + Regexp + ")$";
56+
RegExes.emplace_back(
57+
std::make_pair(make_unique<Regex>(std::move(CheckRE)), LineNumber));
5658
return true;
5759
}
5860

59-
void SpecialCaseList::Matcher::compile() {
60-
if (!UncompiledRegEx.empty()) {
61-
RegEx.reset(new Regex(UncompiledRegEx));
62-
UncompiledRegEx.clear();
63-
}
64-
}
65-
66-
bool SpecialCaseList::Matcher::match(StringRef Query) const {
67-
if (Strings.count(Query))
68-
return true;
61+
unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
62+
auto It = Strings.find(Query);
63+
if (It != Strings.end())
64+
return It->second;
6965
if (Trigrams.isDefinitelyOut(Query))
7066
return false;
71-
return RegEx && RegEx->match(Query);
67+
for (auto& RegExKV : RegExes)
68+
if (RegExKV.first->match(Query))
69+
return RegExKV.second;
70+
return 0;
7271
}
7372

74-
SpecialCaseList::SpecialCaseList() : Sections(), IsCompiled(false) {}
75-
7673
std::unique_ptr<SpecialCaseList>
7774
SpecialCaseList::create(const std::vector<std::string> &Paths,
7875
std::string &Error) {
@@ -114,7 +111,6 @@ bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
114111
return false;
115112
}
116113
}
117-
compile();
118114
return true;
119115
}
120116

@@ -123,7 +119,6 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
123119
StringMap<size_t> Sections;
124120
if (!parse(MB, Sections, Error))
125121
return false;
126-
compile();
127122
return true;
128123
}
129124

@@ -132,11 +127,13 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB,
132127
std::string &Error) {
133128
// Iterate through each line in the blacklist file.
134129
SmallVector<StringRef, 16> Lines;
135-
SplitString(MB->getBuffer(), Lines, "\n\r");
130+
MB->getBuffer().split(Lines, '\n');
136131

137-
int LineNo = 1;
132+
unsigned LineNo = 1;
138133
StringRef Section = "*";
134+
139135
for (auto I = Lines.begin(), E = Lines.end(); I != E; ++I, ++LineNo) {
136+
*I = I->trim();
140137
// Ignore empty lines and lines starting with "#"
141138
if (I->empty() || I->startswith("#"))
142139
continue;
@@ -181,19 +178,18 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB,
181178
if (SectionsMap.find(Section) == SectionsMap.end()) {
182179
std::unique_ptr<Matcher> M = make_unique<Matcher>();
183180
std::string REError;
184-
if (!M->insert(Section, REError)) {
181+
if (!M->insert(Section, LineNo, REError)) {
185182
Error = (Twine("malformed section ") + Section + ": '" + REError).str();
186183
return false;
187184
}
188-
M->compile();
189185

190186
SectionsMap[Section] = Sections.size();
191187
Sections.emplace_back(std::move(M));
192188
}
193189

194190
auto &Entry = Sections[SectionsMap[Section]].Entries[Prefix][Category];
195191
std::string REError;
196-
if (!Entry.insert(std::move(Regexp), REError)) {
192+
if (!Entry.insert(std::move(Regexp), LineNo, REError)) {
197193
Error = (Twine("malformed regex in line ") + Twine(LineNo) + ": '" +
198194
SplitLine.second + "': " + REError).str();
199195
return false;
@@ -202,38 +198,33 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB,
202198
return true;
203199
}
204200

205-
void SpecialCaseList::compile() {
206-
assert(!IsCompiled && "compile() should only be called once");
207-
// Iterate through every section compiling regular expressions for every query
208-
// and creating Section entries.
209-
for (auto &Section : Sections)
210-
for (auto &Prefix : Section.Entries)
211-
for (auto &Category : Prefix.getValue())
212-
Category.getValue().compile();
213-
214-
IsCompiled = true;
215-
}
216-
217201
SpecialCaseList::~SpecialCaseList() {}
218202

219203
bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
220204
StringRef Query, StringRef Category) const {
221-
assert(IsCompiled && "SpecialCaseList::compile() was not called!");
205+
return inSectionBlame(Section, Prefix, Query, Category);
206+
}
222207

208+
unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
209+
StringRef Query,
210+
StringRef Category) const {
223211
for (auto &SectionIter : Sections)
224-
if (SectionIter.SectionMatcher->match(Section) &&
225-
inSection(SectionIter.Entries, Prefix, Query, Category))
226-
return true;
227-
228-
return false;
212+
if (SectionIter.SectionMatcher->match(Section)) {
213+
unsigned Blame =
214+
inSectionBlame(SectionIter.Entries, Prefix, Query, Category);
215+
if (Blame)
216+
return Blame;
217+
}
218+
return 0;
229219
}
230220

231-
bool SpecialCaseList::inSection(const SectionEntries &Entries, StringRef Prefix,
232-
StringRef Query, StringRef Category) const {
221+
unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries,
222+
StringRef Prefix, StringRef Query,
223+
StringRef Category) const {
233224
SectionEntries::const_iterator I = Entries.find(Prefix);
234-
if (I == Entries.end()) return false;
225+
if (I == Entries.end()) return 0;
235226
StringMap<Matcher>::const_iterator II = I->second.find(Category);
236-
if (II == I->second.end()) return false;
227+
if (II == I->second.end()) return 0;
237228

238229
return II->getValue().match(Query);
239230
}

‎llvm/unittests/Support/SpecialCaseListTest.cpp

+24
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,30 @@ TEST_F(SpecialCaseListTest, Basic) {
5858
EXPECT_FALSE(SCL->inSection("", "src", "hi"));
5959
EXPECT_FALSE(SCL->inSection("", "fun", "hello"));
6060
EXPECT_FALSE(SCL->inSection("", "src", "hello", "category"));
61+
62+
EXPECT_EQ(3u, SCL->inSectionBlame("", "src", "hello"));
63+
EXPECT_EQ(4u, SCL->inSectionBlame("", "src", "bye"));
64+
EXPECT_EQ(5u, SCL->inSectionBlame("", "src", "hi", "category"));
65+
EXPECT_EQ(6u, SCL->inSectionBlame("", "src", "zzzz", "category"));
66+
EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hi"));
67+
EXPECT_EQ(0u, SCL->inSectionBlame("", "fun", "hello"));
68+
EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hello", "category"));
69+
}
70+
71+
TEST_F(SpecialCaseListTest, CorrectErrorLineNumberWithBlankLine) {
72+
std::string Error;
73+
EXPECT_EQ(nullptr, makeSpecialCaseList("# This is a comment.\n"
74+
"\n"
75+
"[not valid\n",
76+
Error));
77+
EXPECT_TRUE(
78+
((StringRef)Error).startswith("malformed section header on line 3:"));
79+
80+
EXPECT_EQ(nullptr, makeSpecialCaseList("\n\n\n"
81+
"[not valid\n",
82+
Error));
83+
EXPECT_TRUE(
84+
((StringRef)Error).startswith("malformed section header on line 4:"));
6185
}
6286

6387
TEST_F(SpecialCaseListTest, SectionRegexErrorHandling) {

0 commit comments

Comments
 (0)
Please sign in to comment.