|
| 1 | +//===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===// |
| 2 | +// |
| 3 | +// The LLVM Compiler Infrastructure |
| 4 | +// |
| 5 | +// This file is distributed under the University of Illinois Open Source |
| 6 | +// License. See LICENSE.TXT for details. |
| 7 | +// |
| 8 | +//===----------------------------------------------------------------------===// |
| 9 | + |
| 10 | +#include "BranchCloneCheck.h" |
| 11 | +#include "clang/AST/ASTContext.h" |
| 12 | +#include "clang/ASTMatchers/ASTMatchFinder.h" |
| 13 | +#include "clang/Analysis/CloneDetection.h" |
| 14 | +#include "llvm/Support/Casting.h" |
| 15 | + |
| 16 | +using namespace clang; |
| 17 | +using namespace clang::ast_matchers; |
| 18 | + |
| 19 | +/// Returns true when the statements are Type I clones of each other. |
| 20 | +static bool areStatementsIdentical(const Stmt *LHS, const Stmt *RHS, |
| 21 | + const ASTContext &Context) { |
| 22 | + llvm::FoldingSetNodeID DataLHS, DataRHS; |
| 23 | + LHS->Profile(DataLHS, Context, false); |
| 24 | + RHS->Profile(DataRHS, Context, false); |
| 25 | + return (DataLHS == DataRHS); |
| 26 | +} |
| 27 | + |
| 28 | +namespace { |
| 29 | +/// A branch in a switch may consist of several statements; while a branch in |
| 30 | +/// an if/else if/else chain is one statement (which may be a CompoundStmt). |
| 31 | +using SwitchBranch = llvm::SmallVector<const Stmt *, 2>; |
| 32 | +} // anonymous namespace |
| 33 | + |
| 34 | +/// Determines if the bodies of two branches in a switch statements are Type I |
| 35 | +/// clones of each other. This function only examines the body of the branch |
| 36 | +/// and ignores the `case X:` or `default:` at the start of the branch. |
| 37 | +static bool areSwitchBranchesIdentical(const SwitchBranch LHS, |
| 38 | + const SwitchBranch RHS, |
| 39 | + const ASTContext &Context) { |
| 40 | + if (LHS.size() != RHS.size()) |
| 41 | + return false; |
| 42 | + |
| 43 | + for (size_t i = 0, Size = LHS.size(); i < Size; i++) { |
| 44 | + // NOTE: We strip goto labels and annotations in addition to stripping |
| 45 | + // the `case X:` or `default:` labels, but it is very unlikely that this |
| 46 | + // would casue false positives in real-world code. |
| 47 | + if (!areStatementsIdentical(LHS[i]->stripLabelLikeStatements(), |
| 48 | + RHS[i]->stripLabelLikeStatements(), Context)) { |
| 49 | + return false; |
| 50 | + } |
| 51 | + } |
| 52 | + |
| 53 | + return true; |
| 54 | +} |
| 55 | + |
| 56 | +namespace clang { |
| 57 | +namespace tidy { |
| 58 | +namespace bugprone { |
| 59 | + |
| 60 | +void BranchCloneCheck::registerMatchers(MatchFinder *Finder) { |
| 61 | + Finder->addMatcher( |
| 62 | + ifStmt(stmt().bind("if"), |
| 63 | + hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))), |
| 64 | + hasElse(stmt().bind("else"))), |
| 65 | + this); |
| 66 | + Finder->addMatcher(switchStmt().bind("switch"), this); |
| 67 | + Finder->addMatcher(conditionalOperator().bind("condOp"), this); |
| 68 | +} |
| 69 | + |
| 70 | +void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) { |
| 71 | + const ASTContext &Context = *Result.Context; |
| 72 | + |
| 73 | + if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) { |
| 74 | + const Stmt *Then = IS->getThen(); |
| 75 | + assert(Then && "An IfStmt must have a `then` branch!"); |
| 76 | + |
| 77 | + const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else"); |
| 78 | + assert(Else && "We only look for `if` statements with an `else` branch!"); |
| 79 | + |
| 80 | + if (!isa<IfStmt>(Else)) { |
| 81 | + // Just a simple if with no `else if` branch. |
| 82 | + if (areStatementsIdentical(Then->IgnoreContainers(), |
| 83 | + Else->IgnoreContainers(), Context)) { |
| 84 | + diag(IS->getBeginLoc(), "if with identical then and else branches"); |
| 85 | + diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note); |
| 86 | + } |
| 87 | + return; |
| 88 | + } |
| 89 | + |
| 90 | + // This is the complicated case when we start an if/else if/else chain. |
| 91 | + // To find all the duplicates, we collect all the branches into a vector. |
| 92 | + llvm::SmallVector<const Stmt *, 4> Branches; |
| 93 | + const IfStmt *Cur = IS; |
| 94 | + while (true) { |
| 95 | + // Store the `then` branch. |
| 96 | + Branches.push_back(Cur->getThen()); |
| 97 | + |
| 98 | + Else = Cur->getElse(); |
| 99 | + // The chain ends if there is no `else` branch. |
| 100 | + if (!Else) |
| 101 | + break; |
| 102 | + |
| 103 | + // Check if there is another `else if`... |
| 104 | + Cur = dyn_cast<IfStmt>(Else); |
| 105 | + if (!Cur) { |
| 106 | + // ...this is just a plain `else` branch at the end of the chain. |
| 107 | + Branches.push_back(Else); |
| 108 | + break; |
| 109 | + } |
| 110 | + } |
| 111 | + |
| 112 | + size_t N = Branches.size(); |
| 113 | + llvm::BitVector KnownAsClone(N); |
| 114 | + |
| 115 | + for (size_t i = 0; i + 1 < N; i++) { |
| 116 | + // We have already seen Branches[i] as a clone of an earlier branch. |
| 117 | + if (KnownAsClone[i]) |
| 118 | + continue; |
| 119 | + |
| 120 | + int NumCopies = 1; |
| 121 | + |
| 122 | + for (size_t j = i + 1; j < N; j++) { |
| 123 | + if (KnownAsClone[j] || |
| 124 | + !areStatementsIdentical(Branches[i]->IgnoreContainers(), |
| 125 | + Branches[j]->IgnoreContainers(), Context)) |
| 126 | + continue; |
| 127 | + |
| 128 | + NumCopies++; |
| 129 | + KnownAsClone[j] = true; |
| 130 | + |
| 131 | + if (NumCopies == 2) { |
| 132 | + // We report the first occurence only when we find the second one. |
| 133 | + diag(Branches[i]->getBeginLoc(), |
| 134 | + "repeated branch in conditional chain"); |
| 135 | + diag(Lexer::getLocForEndOfToken(Branches[i]->getEndLoc(), 0, |
| 136 | + *Result.SourceManager, getLangOpts()), |
| 137 | + "end of the original", DiagnosticIDs::Note); |
| 138 | + } |
| 139 | + |
| 140 | + diag(Branches[j]->getBeginLoc(), "clone %0 starts here", |
| 141 | + DiagnosticIDs::Note) |
| 142 | + << (NumCopies - 1); |
| 143 | + } |
| 144 | + } |
| 145 | + return; |
| 146 | + } |
| 147 | + |
| 148 | + if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) { |
| 149 | + // We do not try to detect chains of ?: operators. |
| 150 | + if (areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), Context)) |
| 151 | + diag(CO->getQuestionLoc(), |
| 152 | + "conditional operator with identical true and false expressions"); |
| 153 | + |
| 154 | + return; |
| 155 | + } |
| 156 | + |
| 157 | + if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) { |
| 158 | + const CompoundStmt *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody()); |
| 159 | + |
| 160 | + // Code like |
| 161 | + // switch (x) case 0: case 1: foobar(); |
| 162 | + // is legal and calls foobar() if and only if x is either 0 or 1; |
| 163 | + // but we do not try to distinguish branches in such code. |
| 164 | + if (!Body) |
| 165 | + return; |
| 166 | + |
| 167 | + // We will first collect the branches of the switch statements. For the |
| 168 | + // sake of simplicity we say that branches are delimited by the SwitchCase |
| 169 | + // (`case:` or `default:`) children of Body; that is, we ignore `case:` or |
| 170 | + // `default:` labels embedded inside other statements and we do not follow |
| 171 | + // the effects of `break` and other manipulation of the control-flow. |
| 172 | + llvm::SmallVector<SwitchBranch, 4> Branches; |
| 173 | + for (const Stmt *S : Body->body()) { |
| 174 | + // If this is a `case` or `default`, we start a new, empty branch. |
| 175 | + if (isa<SwitchCase>(S)) |
| 176 | + Branches.emplace_back(); |
| 177 | + |
| 178 | + // There may be code before the first branch (which can be dead code |
| 179 | + // and can be code reached either through goto or through case labels |
| 180 | + // that are embedded inside e.g. inner compound statements); we do not |
| 181 | + // store those statements in branches. |
| 182 | + if (!Branches.empty()) |
| 183 | + Branches.back().push_back(S); |
| 184 | + } |
| 185 | + |
| 186 | + auto End = Branches.end(); |
| 187 | + auto BeginCurrent = Branches.begin(); |
| 188 | + while (BeginCurrent < End) { |
| 189 | + auto EndCurrent = BeginCurrent + 1; |
| 190 | + while (EndCurrent < End && |
| 191 | + areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) { |
| 192 | + ++EndCurrent; |
| 193 | + } |
| 194 | + // At this point the iterator range {BeginCurrent, EndCurrent} contains a |
| 195 | + // complete family of consecutive identical branches. |
| 196 | + if (EndCurrent > BeginCurrent + 1) { |
| 197 | + diag(BeginCurrent->front()->getBeginLoc(), |
| 198 | + "switch has %0 consecutive identical branches") |
| 199 | + << static_cast<int>(std::distance(BeginCurrent, EndCurrent)); |
| 200 | + diag(Lexer::getLocForEndOfToken((EndCurrent - 1)->back()->getEndLoc(), |
| 201 | + 0, *Result.SourceManager, |
| 202 | + getLangOpts()), |
| 203 | + "last of these clones ends here", DiagnosticIDs::Note); |
| 204 | + } |
| 205 | + BeginCurrent = EndCurrent; |
| 206 | + } |
| 207 | + return; |
| 208 | + } |
| 209 | + |
| 210 | + llvm_unreachable("No if statement and no switch statement."); |
| 211 | +} |
| 212 | + |
| 213 | +} // namespace bugprone |
| 214 | +} // namespace tidy |
| 215 | +} // namespace clang |
0 commit comments