Index: include/clang/Analysis/CloneDetection.h =================================================================== --- include/clang/Analysis/CloneDetection.h +++ include/clang/Analysis/CloneDetection.h @@ -24,6 +24,7 @@ class Stmt; class Decl; +class VarDecl; class ASTContext; class CompoundStmt; @@ -222,7 +223,43 @@ /// that were identified to be clones of each other. /// \param MinGroupComplexity Only return clones which have at least this /// complexity value. - void findClones(std::vector &Result, unsigned MinGroupComplexity); + /// \param CheckPatterns Returns only clone groups in which the referenced + /// variables follow the same pattern. + void findClones(std::vector &Result, unsigned MinGroupComplexity, + bool CheckPatterns = true); + + /// \brief Describes two clones that reference their variables in a different + /// pattern which could indicate a programming error. + struct SuspiciousClonePair { + /// \brief Utility class holding the relevant information about a single + /// clone in this pair. + struct SuspiciousClone { + /// The variable which referencing in this clone was against the pattern. + const VarDecl *Variable; + /// Where the variable was referenced. + SourceRange Location; + /// The variable that should have been referenced to follow the pattern. + /// If Suggestion is a nullptr then it's not possible to fix the pattern + /// by referencing a different variable in this clone. + const VarDecl *Suggestion; + SuspiciousClone(const VarDecl *Variable, SourceRange Location, + const VarDecl *Suggestion) + : Variable(Variable), Location(Location), Suggestion(Suggestion) {} + SuspiciousClone() {} + }; + /// The first clone in the pair which always has a suggested variable. + SuspiciousClone FirstClone; + /// This other clone in the pair which can have a suggested variable. + SuspiciousClone SecondClone; + }; + + /// \brief Searches the provided statements for pairs of clones that don't + /// follow the same pattern when referencing variables. + /// \param Result Output parameter that will contain the clone pairs. + /// \param MinGroupComplexity Only clone pairs in which the clones have at + /// least this complexity value. + void findSuspiciousClones(std::vector &Result, + unsigned MinGroupComplexity); private: /// Stores all found clone groups including invalid groups with only a single Index: lib/Analysis/CloneDetection.cpp =================================================================== --- lib/Analysis/CloneDetection.cpp +++ lib/Analysis/CloneDetection.cpp @@ -88,8 +88,11 @@ struct VariableOccurence { /// The index of the associated VarDecl in the Variables vector. size_t KindID; + /// The location in the code where the variable was references. + SourceRange Location; - VariableOccurence(size_t KindID) : KindID(KindID) {} + VariableOccurence(size_t KindID, SourceRange Location) + : KindID(KindID), Location(Location) {} }; /// All occurences of referenced variables in the order of appearance. @@ -100,19 +103,20 @@ /// \brief Adds a new variable referenced to this pattern. /// \param VarDecl The declaration of the variable that is referenced. - void addVariableOccurence(const VarDecl *VarDecl) { + /// \param Location The SourceRange where this variable is referenced. + void addVariableOccurence(const VarDecl *VarDecl, SourceRange Location) { // First check if we already reference this variable for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) { if (Variables[KindIndex] == VarDecl) { // If yes, add a new occurence that points to the existing entry in // the Variables vector. - Occurences.emplace_back(KindIndex); + Occurences.emplace_back(KindIndex, Location); return; } } // If this variable wasn't already referenced, add it to the list of // referenced variables and add a occurence that points to this new entry. - Occurences.emplace_back(Variables.size()); + Occurences.emplace_back(Variables.size(), Location); Variables.push_back(VarDecl); } @@ -127,7 +131,7 @@ // Check if S is a reference to a variable. If yes, add it to the pattern. if (auto D = dyn_cast(S)) { if (auto VD = dyn_cast(D->getDecl()->getCanonicalDecl())) - addVariableOccurence(VD); + addVariableOccurence(VD, D->getSourceRange()); } // Recursively check all children of the given statement. @@ -146,31 +150,89 @@ /// \brief Compares this pattern with the given one. /// \param Other The given VariablePattern to compare with. - /// \return Returns true if and only if the references variables in this - /// object follow the same pattern than the ones in the given - /// VariablePattern. + /// \param FirstMismatch Output parameter that will be filled with information + /// about the first difference between the two patterns. This parameter + /// can be a nullptr, in which case it will be ignored. + /// \return Returns the number of differences between the pattern this object + /// is following and the given VariablePattern. /// - /// For example, the following statements all have the same pattern: + /// For example, the following statements all have the same pattern and this + /// function would return zero: /// /// if (a < b) return a; return b; /// if (x < y) return x; return y; /// if (u2 < u1) return u2; return u1; /// - /// but the following statement has a different pattern (note the changed - /// variables in the return statements). + /// But the following statement has a different pattern (note the changed + /// variables in the return statements) and would have two differences when + /// compared with one of the statements above. /// /// if (a < b) return b; return a; /// /// This function should only be called if the related statements of the given /// pattern and the statements of this objects are clones of each other. - bool comparePattern(const VariablePattern &Other) { + unsigned getPatternDifferences( + const VariablePattern &Other, + CloneDetector::SuspiciousClonePair *FirstMismatch = nullptr) { + unsigned NumberOfDifferences = 0; + assert(Other.Occurences.size() == Occurences.size()); for (unsigned i = 0; i < Occurences.size(); ++i) { - if (Occurences[i].KindID != Other.Occurences[i].KindID) { - return false; + auto ThisOccurence = Occurences[i]; + auto OtherOccurence = Other.Occurences[i]; + if (ThisOccurence.KindID != OtherOccurence.KindID) { + ++NumberOfDifferences; + + // If FirstMismatch is not a nullptr, we need to store information about + // the first difference between the two patterns. + if (FirstMismatch == nullptr) + continue; + + // Only proceed if we just found the first difference as we only store + // information about the first difference. + if (NumberOfDifferences != 1) + continue; + + const VarDecl *FirstSuggestion = nullptr; + // If there is a variable available in the list of referenced variables + // which wouldn't break the pattern if it is used in place of the + // current variable, we provide this variable as the suggested fix. + if (OtherOccurence.KindID < Variables.size()) + FirstSuggestion = Variables[OtherOccurence.KindID]; + + // Store information about the first clone. + FirstMismatch->FirstClone = + CloneDetector::SuspiciousClonePair::SuspiciousClone( + Variables[ThisOccurence.KindID], ThisOccurence.Location, + FirstSuggestion); + + // Same as above but with the other clone. We do this for both clones as + // we don't know which clone is the one containing the unintended + // pattern error. + const VarDecl *SecondSuggestion = nullptr; + if (ThisOccurence.KindID < Other.Variables.size()) + SecondSuggestion = Other.Variables[ThisOccurence.KindID]; + + // Store information about the second clone. + FirstMismatch->SecondClone = + CloneDetector::SuspiciousClonePair::SuspiciousClone( + Variables[ThisOccurence.KindID], OtherOccurence.Location, + SecondSuggestion); + + // SuspiciousClonePair guarantees that the first clone always has a + // suggested variable associated with it. As we know that one of the two + // clones in the pair always has suggestion, we swap the two clones + // in case the first clone has no suggested variable which means that + // the second clone has a suggested variable and should be first. + if (!FirstMismatch->FirstClone.Suggestion) + std::swap(FirstMismatch->FirstClone, FirstMismatch->SecondClone); + + // This ensures that we always have at least one suggestion in a pair. + assert(FirstMismatch->FirstClone.Suggestion); } } - return true; + + return NumberOfDifferences; } }; } @@ -526,7 +588,8 @@ // and assign them to our new clone group. auto I = UnassignedSequences.begin(), E = UnassignedSequences.end(); while (I != E) { - if (VariablePattern(*I).comparePattern(PrototypeFeatures)) { + + if (VariablePattern(*I).getPatternDifferences(PrototypeFeatures) == 0) { FilteredGroup.Sequences.push_back(*I); I = UnassignedSequences.erase(I); continue; @@ -543,11 +606,15 @@ } void CloneDetector::findClones(std::vector &Result, - unsigned MinGroupComplexity) { + unsigned MinGroupComplexity, + bool CheckPatterns) { // Add every valid clone group that fulfills the complexity requirement. for (const CloneGroup &Group : CloneGroups) { if (Group.isValid() && Group.Complexity >= MinGroupComplexity) { - createCloneGroups(Result, Group); + if (CheckPatterns) + createCloneGroups(Result, Group); + else + Result.push_back(Group); } } @@ -577,3 +644,37 @@ Result.erase(Result.begin() + *I); } } + +void CloneDetector::findSuspiciousClones( + std::vector &Result, + unsigned MinGroupComplexity) { + std::vector Clones; + // Reuse the normal search for clones but specify that the clone groups don't + // need to have a common referenced variable pattern so that we can manually + // search for the kind of pattern errors this function is supposed to find. + findClones(Clones, MinGroupComplexity, false); + + for (const CloneGroup &Group : Clones) { + for (unsigned i = 0; i < Group.Sequences.size(); ++i) { + VariablePattern PatternA(Group.Sequences[i]); + + for (unsigned j = i + 1; j < Group.Sequences.size(); ++j) { + VariablePattern PatternB(Group.Sequences[j]); + + CloneDetector::SuspiciousClonePair ClonePair; + // For now, we only report clones which break the variable pattern just + // once because multiple differences in a pattern are an indicator that + // those differences are maybe intended (e.g. because it's actually + // a different algorithm). + // TODO: In very big clones even multiple variables can be unintended, + // so replacing this number with a percentage could better handle such + // cases. On the other hand it could increase the false-positive rate + // for all clones if the percentage is too high. + if (PatternA.getPatternDifferences(PatternB, &ClonePair) == 1) { + Result.push_back(ClonePair); + break; + } + } + } + } +} Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/CloneChecker.cpp +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -34,6 +34,83 @@ void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, AnalysisManager &Mgr, BugReporter &BR) const; + + /// \brief Reports all clones to the user. + void reportClones(SourceManager &SM, AnalysisManager &Mgr, + int MinComplexity) const { + + std::vector CloneGroups; + CloneDetector.findClones(CloneGroups, MinComplexity); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning, + "Detected code clone."); + + unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note, + "Related code clone is here."); + + for (CloneDetector::CloneGroup &Group : CloneGroups) { + // For readability reasons we sort the clones by line numbers. + std::sort(Group.Sequences.begin(), Group.Sequences.end(), + [&SM](const StmtSequence &LHS, const StmtSequence &RHS) { + return SM.isBeforeInTranslationUnit(LHS.getStartLoc(), + RHS.getStartLoc()) && + SM.isBeforeInTranslationUnit(LHS.getEndLoc(), + RHS.getEndLoc()); + }); + + // We group the clones by printing the first as a warning and all others + // as a note. + DiagEngine.Report(Group.Sequences.front().getStartLoc(), WarnID); + for (unsigned i = 1; i < Group.Sequences.size(); ++i) { + DiagEngine.Report(Group.Sequences[i].getStartLoc(), NoteID); + } + } + } + + /// \brief Reports only suspicious clones to the user along with informaton + /// that explain why they are suspicious. + void reportSuspiciousClones(SourceManager &SM, AnalysisManager &Mgr, + int MinComplexity) const { + + std::vector Clones; + CloneDetector.findSuspiciousClones(Clones, MinComplexity); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + auto WarnID = DiagEngine.getCustomDiagID( + DiagnosticsEngine::Warning, "Maybe you wanted to use %0 here?"); + + auto NoteID = DiagEngine.getCustomDiagID( + DiagnosticsEngine::Note, "Suggestion is based on the useage of this " + "variable in a similar piece of code."); + + auto NoteWithSuggestionID = DiagEngine.getCustomDiagID( + DiagnosticsEngine::Note, "Or maybe you wanted to use %0 here in this " + "similar piece of code?"); + + for (CloneDetector::SuspiciousClonePair &Pair : Clones) { + // The first clone always has a suggestion and we report it to the user + // along with the place where the suggestion should be used. + DiagEngine.Report(Pair.FirstClone.Location.getBegin(), WarnID) + << Pair.FirstClone.Location << Pair.FirstClone.Suggestion; + + // The second clone can have a suggestion and if there is one, we report + // that suggestion to the user. + if (Pair.SecondClone.Suggestion) { + DiagEngine.Report(Pair.SecondClone.Location.getBegin(), + NoteWithSuggestionID) + << Pair.SecondClone.Location << Pair.SecondClone.Suggestion; + continue; + } + + // If there isn't a suggestion in the second clone, we only inform the + // user where we got the idea that his code could contain an error. + DiagEngine.Report(Pair.SecondClone.Location.getBegin(), NoteID) + << Pair.SecondClone.Location; + } + } }; } // end anonymous namespace @@ -52,39 +129,19 @@ int MinComplexity = Mgr.getAnalyzerOptions().getOptionAsInteger( "MinimumCloneComplexity", 10, this); - assert(MinComplexity >= 0); - SourceManager &SM = BR.getSourceManager(); - - std::vector CloneGroups; - CloneDetector.findClones(CloneGroups, MinComplexity); - - DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + bool ReportSuspiciousClones = Mgr.getAnalyzerOptions().getBooleanOption( + "ReportSuspiciousClones", true, this); - unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning, - "Detected code clone."); + bool ReportNormalClones = Mgr.getAnalyzerOptions().getBooleanOption( + "ReportNormalClones", true, this); - unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note, - "Related code clone is here."); + if (ReportSuspiciousClones) + reportSuspiciousClones(BR.getSourceManager(), Mgr, MinComplexity); - for (CloneDetector::CloneGroup &Group : CloneGroups) { - // For readability reasons we sort the clones by line numbers. - std::sort(Group.Sequences.begin(), Group.Sequences.end(), - [&SM](const StmtSequence &LHS, const StmtSequence &RHS) { - return SM.isBeforeInTranslationUnit(LHS.getStartLoc(), - RHS.getStartLoc()) && - SM.isBeforeInTranslationUnit(LHS.getEndLoc(), - RHS.getEndLoc()); - }); - - // We group the clones by printing the first as a warning and all others - // as a note. - DiagEngine.Report(Group.Sequences.front().getStartLoc(), WarnID); - for (unsigned i = 1; i < Group.Sequences.size(); ++i) { - DiagEngine.Report(Group.Sequences[i].getStartLoc(), NoteID); - } - } + if (ReportNormalClones) + reportClones(BR.getSourceManager(), Mgr, MinComplexity); } //===----------------------------------------------------------------------===// Index: test/Analysis/copypaste/suspicious-clones.cpp =================================================================== --- /dev/null +++ test/Analysis/copypaste/suspicious-clones.cpp @@ -0,0 +1,97 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:ReportSuspiciousClones=true -analyzer-config alpha.clone.CloneChecker:ReportNormalClones=false -verify %s + +// Tests finding a suspicious clone that references local variables. + +void log(); + +int max(int a, int b) { + log(); + if (a > b) + return a; + return b; // expected-note{{Suggestion is based on the useage of this variable in a similar piece of code.}} +} + +int maxClone(int x, int y, int z) { + log(); + if (x > y) + return x; + return z; // expected-warning{{Maybe you wanted to use 'y' here?}} +} + +// Tests finding a suspicious clone that references global variables. + +struct mutex { + bool try_lock(); + void unlock(); +}; + +mutex m1; +mutex m2; +int i; + +void busyIncrement() { + while (true) { + if (m1.try_lock()) { + ++i; + m1.unlock(); // expected-note{{Suggestion is based on the useage of this variable in a similar piece of code.}} + if (i > 1000) { + return; + } + } + } +} + +void faultyBusyIncrement() { + while (true) { + if (m1.try_lock()) { + ++i; + m2.unlock(); // expected-warning{{Maybe you wanted to use 'm1' here?}} + if (i > 1000) { + return; + } + } + } +} + +// Tests that we provide two suggestions in cases where two fixes are possible. + +int foo(int a, int b, int c) { + a += b + c; + b /= a + b; + c -= b * a; // expected-warning{{Maybe you wanted to use 'a' here?}} + return c; +} + +int fooClone(int a, int b, int c) { + a += b + c; + b /= a + b; + c -= a * a; // expected-note{{Or maybe you wanted to use 'b' here in this similar piece of code?}} + return c; +} + + +// Tests that for clone groups with a many possible suspicious clone pairs, at +// most one warning per clone group is generated and every relevant clone is +// reported through either a warning or a note. + +long bar1(long a, long b, long c, long d) { + c = a - b; + c = c / d * a; + d = b * b - c; // expected-warning{{Maybe you wanted to use 'c' here?}} + return d; +} + +long bar2(long a, long b, long c, long d) { + c = a - b; + c = c / d * a; + d = c * b - c; // expected-note{{Or maybe you wanted to use 'b' here in this similar piece of code?}} \ + // expected-warning{{Maybe you wanted to use 'a' here?}} + return d; +} + +long bar3(long a, long b, long c, long d) { + c = a - b; + c = c / d * a; + d = a * b - c; // expected-note{{Or maybe you wanted to use 'c' here in this similar piece of code?}} + return d; +}