Skip to content

Commit cd207f1

Browse files
committedDec 5, 2018
[clang-tidy] new check: bugprone-branch-clone
Summary: Implement a check for detecting if/else if/else chains where two or more branches are Type I clones of each other (that is, they contain identical code) and for detecting switch statements where two or more consecutive branches are Type I clones of each other. Patch by donat.nagy. Reviewers: alexfh, hokein, aaron.ballman, JonasToth Reviewed By: JonasToth Subscribers: MTC, lebedev.ri, whisperity, xazax.hun, Eugene.Zelenko, mgorny, rnkovacs, dkrupp, Szelethus, gamesh411, cfe-commits Tags: #clang-tools-extra Differential Revision: https://reviews.llvm.org/D54757 llvm-svn: 348343
1 parent 45562a3 commit cd207f1

File tree

8 files changed

+1373
-0
lines changed

8 files changed

+1373
-0
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
//===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#include "BranchCloneCheck.h"
11+
#include "clang/AST/ASTContext.h"
12+
#include "clang/ASTMatchers/ASTMatchFinder.h"
13+
#include "clang/Analysis/CloneDetection.h"
14+
#include "llvm/Support/Casting.h"
15+
16+
using namespace clang;
17+
using namespace clang::ast_matchers;
18+
19+
/// Returns true when the statements are Type I clones of each other.
20+
static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS,
21+
const ASTContext &Context) {
22+
llvm::FoldingSetNodeID DataLHS, DataRHS;
23+
LHS->Profile(DataLHS, Context, false);
24+
RHS->Profile(DataRHS, Context, false);
25+
return (DataLHS == DataRHS);
26+
}
27+
28+
namespace {
29+
/// A branch in a switch may consist of several statements; while a branch in
30+
/// an if/else if/else chain is one statement (which may be a CompoundStmt).
31+
using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
32+
} // anonymous namespace
33+
34+
/// Determines if the bodies of two branches in a switch statements are Type I
35+
/// clones of each other. This function only examines the body of the branch
36+
/// and ignores the `case X:` or `default:` at the start of the branch.
37+
static bool areSwitchBranchesIdentical(const SwitchBranch LHS,
38+
const SwitchBranch RHS,
39+
const ASTContext &Context) {
40+
if (LHS.size() != RHS.size())
41+
return false;
42+
43+
for (size_t i = 0, Size = LHS.size(); i < Size; i++) {
44+
// NOTE: We strip goto labels and annotations in addition to stripping
45+
// the `case X:` or `default:` labels, but it is very unlikely that this
46+
// would casue false positives in real-world code.
47+
if (!areStatementsIdentical(LHS[i]->stripLabelLikeStatements(),
48+
RHS[i]->stripLabelLikeStatements(), Context)) {
49+
return false;
50+
}
51+
}
52+
53+
return true;
54+
}
55+
56+
namespace clang {
57+
namespace tidy {
58+
namespace bugprone {
59+
60+
void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
61+
Finder->addMatcher(
62+
ifStmt(stmt().bind("if"),
63+
hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
64+
hasElse(stmt().bind("else"))),
65+
this);
66+
Finder->addMatcher(switchStmt().bind("switch"), this);
67+
Finder->addMatcher(conditionalOperator().bind("condOp"), this);
68+
}
69+
70+
void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
71+
const ASTContext &Context = *Result.Context;
72+
73+
if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) {
74+
const Stmt *Then = IS->getThen();
75+
assert(Then && "An IfStmt must have a `then` branch!");
76+
77+
const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else");
78+
assert(Else && "We only look for `if` statements with an `else` branch!");
79+
80+
if (!isa<IfStmt>(Else)) {
81+
// Just a simple if with no `else if` branch.
82+
if (areStatementsIdentical(Then->IgnoreContainers(),
83+
Else->IgnoreContainers(), Context)) {
84+
diag(IS->getBeginLoc(), "if with identical then and else branches");
85+
diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note);
86+
}
87+
return;
88+
}
89+
90+
// This is the complicated case when we start an if/else if/else chain.
91+
// To find all the duplicates, we collect all the branches into a vector.
92+
llvm::SmallVector<const Stmt *, 4> Branches;
93+
const IfStmt *Cur = IS;
94+
while (true) {
95+
// Store the `then` branch.
96+
Branches.push_back(Cur->getThen());
97+
98+
Else = Cur->getElse();
99+
// The chain ends if there is no `else` branch.
100+
if (!Else)
101+
break;
102+
103+
// Check if there is another `else if`...
104+
Cur = dyn_cast<IfStmt>(Else);
105+
if (!Cur) {
106+
// ...this is just a plain `else` branch at the end of the chain.
107+
Branches.push_back(Else);
108+
break;
109+
}
110+
}
111+
112+
size_t N = Branches.size();
113+
llvm::BitVector KnownAsClone(N);
114+
115+
for (size_t i = 0; i + 1 < N; i++) {
116+
// We have already seen Branches[i] as a clone of an earlier branch.
117+
if (KnownAsClone[i])
118+
continue;
119+
120+
int NumCopies = 1;
121+
122+
for (size_t j = i + 1; j < N; j++) {
123+
if (KnownAsClone[j] ||
124+
!areStatementsIdentical(Branches[i]->IgnoreContainers(),
125+
Branches[j]->IgnoreContainers(), Context))
126+
continue;
127+
128+
NumCopies++;
129+
KnownAsClone[j] = true;
130+
131+
if (NumCopies == 2) {
132+
// We report the first occurence only when we find the second one.
133+
diag(Branches[i]->getBeginLoc(),
134+
"repeated branch in conditional chain");
135+
diag(Lexer::getLocForEndOfToken(Branches[i]->getEndLoc(), 0,
136+
*Result.SourceManager, getLangOpts()),
137+
"end of the original", DiagnosticIDs::Note);
138+
}
139+
140+
diag(Branches[j]->getBeginLoc(), "clone %0 starts here",
141+
DiagnosticIDs::Note)
142+
<< (NumCopies - 1);
143+
}
144+
}
145+
return;
146+
}
147+
148+
if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) {
149+
// We do not try to detect chains of ?: operators.
150+
if (areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), Context))
151+
diag(CO->getQuestionLoc(),
152+
"conditional operator with identical true and false expressions");
153+
154+
return;
155+
}
156+
157+
if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) {
158+
const CompoundStmt *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody());
159+
160+
// Code like
161+
// switch (x) case 0: case 1: foobar();
162+
// is legal and calls foobar() if and only if x is either 0 or 1;
163+
// but we do not try to distinguish branches in such code.
164+
if (!Body)
165+
return;
166+
167+
// We will first collect the branches of the switch statements. For the
168+
// sake of simplicity we say that branches are delimited by the SwitchCase
169+
// (`case:` or `default:`) children of Body; that is, we ignore `case:` or
170+
// `default:` labels embedded inside other statements and we do not follow
171+
// the effects of `break` and other manipulation of the control-flow.
172+
llvm::SmallVector<SwitchBranch, 4> Branches;
173+
for (const Stmt *S : Body->body()) {
174+
// If this is a `case` or `default`, we start a new, empty branch.
175+
if (isa<SwitchCase>(S))
176+
Branches.emplace_back();
177+
178+
// There may be code before the first branch (which can be dead code
179+
// and can be code reached either through goto or through case labels
180+
// that are embedded inside e.g. inner compound statements); we do not
181+
// store those statements in branches.
182+
if (!Branches.empty())
183+
Branches.back().push_back(S);
184+
}
185+
186+
auto End = Branches.end();
187+
auto BeginCurrent = Branches.begin();
188+
while (BeginCurrent < End) {
189+
auto EndCurrent = BeginCurrent + 1;
190+
while (EndCurrent < End &&
191+
areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) {
192+
++EndCurrent;
193+
}
194+
// At this point the iterator range {BeginCurrent, EndCurrent} contains a
195+
// complete family of consecutive identical branches.
196+
if (EndCurrent > BeginCurrent + 1) {
197+
diag(BeginCurrent->front()->getBeginLoc(),
198+
"switch has %0 consecutive identical branches")
199+
<< static_cast<int>(std::distance(BeginCurrent, EndCurrent));
200+
diag(Lexer::getLocForEndOfToken((EndCurrent - 1)->back()->getEndLoc(),
201+
0, *Result.SourceManager,
202+
getLangOpts()),
203+
"last of these clones ends here", DiagnosticIDs::Note);
204+
}
205+
BeginCurrent = EndCurrent;
206+
}
207+
return;
208+
}
209+
210+
llvm_unreachable("No if statement and no switch statement.");
211+
}
212+
213+
} // namespace bugprone
214+
} // namespace tidy
215+
} // namespace clang
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//===--- BranchCloneCheck.h - clang-tidy ------------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_BRANCHCLONECHECK_H
11+
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_BRANCHCLONECHECK_H
12+
13+
#include "../ClangTidy.h"
14+
15+
namespace clang {
16+
namespace tidy {
17+
namespace bugprone {
18+
19+
/// A check for detecting if/else if/else chains where two or more branches are
20+
/// Type I clones of each other (that is, they contain identical code), for
21+
/// detecting switch statements where two or more consecutive branches are
22+
/// Type I clones of each other, and for detecting conditional operators where
23+
/// the true and false expressions are Type I clones of each other.
24+
///
25+
/// For the user-facing documentation see:
26+
/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-branch-clone.html
27+
class BranchCloneCheck : public ClangTidyCheck {
28+
public:
29+
BranchCloneCheck(StringRef Name, ClangTidyContext *Context)
30+
: ClangTidyCheck(Name, Context) {}
31+
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
32+
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
33+
};
34+
35+
} // namespace bugprone
36+
} // namespace tidy
37+
} // namespace clang
38+
39+
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_BRANCHCLONECHECK_H

‎clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "ArgumentCommentCheck.h"
1515
#include "AssertSideEffectCheck.h"
1616
#include "BoolPointerImplicitConversionCheck.h"
17+
#include "BranchCloneCheck.h"
1718
#include "CopyConstructorInitCheck.h"
1819
#include "DanglingHandleCheck.h"
1920
#include "ExceptionEscapeCheck.h"
@@ -65,6 +66,8 @@ class BugproneModule : public ClangTidyModule {
6566
"bugprone-assert-side-effect");
6667
CheckFactories.registerCheck<BoolPointerImplicitConversionCheck>(
6768
"bugprone-bool-pointer-implicit-conversion");
69+
CheckFactories.registerCheck<BranchCloneCheck>(
70+
"bugprone-branch-clone");
6871
CheckFactories.registerCheck<CopyConstructorInitCheck>(
6972
"bugprone-copy-constructor-init");
7073
CheckFactories.registerCheck<DanglingHandleCheck>(

‎clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_clang_library(clangTidyBugproneModule
44
ArgumentCommentCheck.cpp
55
AssertSideEffectCheck.cpp
66
BoolPointerImplicitConversionCheck.cpp
7+
BranchCloneCheck.cpp
78
BugproneTidyModule.cpp
89
CopyConstructorInitCheck.cpp
910
DanglingHandleCheck.cpp

‎clang-tools-extra/docs/ReleaseNotes.rst

+7
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,13 @@ Improvements to clang-tidy
122122
Flags uses of ``absl::StrCat()`` to append to a ``std::string``. Suggests
123123
``absl::StrAppend()`` should be used instead.
124124

125+
- New :doc:`bugprone-branch-clone
126+
<clang-tidy/checks/bugprone-branch-clone>` check.
127+
128+
Checks for repeated branches in ``if/else if/else`` chains, consecutive
129+
repeated branches in ``switch`` statements and indentical true and false
130+
branches in conditional operators.
131+
125132
- New :doc:`bugprone-too-small-loop-variable
126133
<clang-tidy/checks/bugprone-too-small-loop-variable>` check.
127134

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
.. title:: clang-tidy - bugprone-branch-clone
2+
3+
bugprone-branch-clone
4+
=====================
5+
6+
Checks for repeated branches in ``if/else if/else`` chains, consecutive
7+
repeated branches in ``switch`` statements and indentical true and false
8+
branches in conditional operators.
9+
10+
.. code-block:: c++
11+
12+
if (test_value(x)) {
13+
y++;
14+
do_something(x, y);
15+
} else {
16+
y++;
17+
do_something(x, y);
18+
}
19+
20+
In this simple example (which could arise e.g. as a copy-paste error) the
21+
``then`` and ``else`` branches are identical and the code is equivalent the
22+
following shorter and cleaner code:
23+
24+
.. code-block:: c++
25+
26+
test_value(x); // can be omitted unless it has side effects
27+
y++;
28+
do_something(x, y);
29+
30+
31+
If this is the inteded behavior, then there is no reason to use a conditional
32+
statement; otherwise the issue can be solved by fixing the branch that is
33+
handled incorrectly.
34+
35+
The check also detects repeated branches in longer ``if/else if/else`` chains
36+
where it would be even harder to notice the problem.
37+
38+
In ``switch`` statements the check only reports repeated branches when they are
39+
consecutive, because it is relatively common that the ``case:`` labels have
40+
some natural ordering and rearranging them would decrease the readability of
41+
the code. For example:
42+
43+
.. code-block:: c++
44+
45+
switch (ch) {
46+
case 'a':
47+
return 10;
48+
case 'A':
49+
return 10;
50+
case 'b':
51+
return 11;
52+
case 'B':
53+
return 11;
54+
default:
55+
return 10;
56+
}
57+
58+
Here the check reports that the ``'a'`` and ``'A'`` branches are identical
59+
(and that the ``'b'`` and ``'B'`` branches are also identical), but does not
60+
report that the ``default:`` branch is also idenical to the first two branches.
61+
If this is indeed the correct behavior, then it could be implemented as:
62+
63+
.. code-block:: c++
64+
65+
switch (ch) {
66+
case 'a':
67+
case 'A':
68+
return 10;
69+
case 'b':
70+
case 'B':
71+
return 11;
72+
default:
73+
return 10;
74+
}
75+
76+
Here the check does not warn for the repeated ``return 10;``, which is good if
77+
we want to preserve that ``'a'`` is before ``'b'`` and ``default:`` is the last
78+
branch.
79+
80+
Finally, the check also examines conditional operators and reports code like:
81+
82+
.. code-block:: c++
83+
84+
return test_value(x) ? x : x;
85+
86+
Unlike if statements, the check does not detect chains of conditional
87+
operators.
88+
89+
Note: This check also reports situations where branches become identical only
90+
after preprocession.

‎clang-tools-extra/docs/clang-tidy/checks/list.rst

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Clang-Tidy Checks
3131
bugprone-argument-comment
3232
bugprone-assert-side-effect
3333
bugprone-bool-pointer-implicit-conversion
34+
bugprone-branch-clone
3435
bugprone-copy-constructor-init
3536
bugprone-dangling-handle
3637
bugprone-exception-escape

‎clang-tools-extra/test/clang-tidy/bugprone-branch-clone.cpp

+1,017
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)
Please sign in to comment.