Diff 68556

include/clang/Analysis/CloneDetection.h

Show First 20 Lines • Show All 164 Lines • ▼ Show 20 Lines	struct CloneSignature {
/// \brief Holds all relevant data of a StmtSequence.		/// \brief Holds all relevant data of a StmtSequence.
///		///
/// If this variable is equal for two different StmtSequences, then they can		/// If this variable is equal for two different StmtSequences, then they can
/// be considered clones of each other.		/// be considered clones of each other.
std::vector<DataPiece> Data;		std::vector<DataPiece> Data;

/// \brief The complexity of the StmtSequence.		/// \brief The complexity of the StmtSequence.
///		///
/// This scalar value serves as a simple way of filtering clones that are		/// This value gives an approximation on how many direct or indirect child
/// too small to be reported. A greater value indicates that the related		/// statements are contained in the related StmtSequence. In general, the
/// StmtSequence is probably more interesting to the user.		/// greater this value, the greater the amount of statements. However, this
		/// is only an approximation and the actual amount of statements can be
		/// higher or lower than this value. Statements that are generated by the
		/// compiler (e.g. macro expansions) for example barely influence the
		/// complexity value.
		///
		/// The main purpose of this value is to filter clones that are too small
		/// and therefore probably not interesting enough for the user.
unsigned Complexity;		unsigned Complexity;

/// \brief Creates an empty CloneSignature without any data.		/// \brief Creates an empty CloneSignature without any data.
CloneSignature() : Complexity(1) {}		CloneSignature() : Complexity(1) {}

CloneSignature(const std::vector<unsigned> &Data, unsigned Complexity)		CloneSignature(const std::vector<unsigned> &Data, unsigned Complexity)
: Data(Data), Complexity(Complexity) {}		: Data(Data), Complexity(Complexity) {}

▲ Show 20 Lines • Show All 54 Lines • Show Last 20 Lines

lib/Analysis/CloneDetection.cpp

Show All 11 Lines
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "clang/Analysis/CloneDetection.h"		#include "clang/Analysis/CloneDetection.h"

#include "clang/AST/ASTContext.h"		#include "clang/AST/ASTContext.h"
#include "clang/AST/RecursiveASTVisitor.h"		#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"		#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"		#include "clang/AST/StmtVisitor.h"
		#include "clang/Lex/Lexer.h"
#include "llvm/ADT/StringRef.h"		#include "llvm/ADT/StringRef.h"
		#include "llvm/Support/raw_ostream.h"

using namespace clang;		using namespace clang;

StmtSequence::StmtSequence(const CompoundStmt *Stmt, ASTContext &Context,		StmtSequence::StmtSequence(const CompoundStmt *Stmt, ASTContext &Context,
unsigned StartIndex, unsigned EndIndex)		unsigned StartIndex, unsigned EndIndex)
: S(Stmt), Context(&Context), StartIndex(StartIndex), EndIndex(EndIndex) {		: S(Stmt), Context(&Context), StartIndex(StartIndex), EndIndex(EndIndex) {
assert(Stmt && "Stmt must not be a nullptr");		assert(Stmt && "Stmt must not be a nullptr");
assert(StartIndex < EndIndex && "Given array should not be empty");		assert(StartIndex < EndIndex && "Given array should not be empty");
▲ Show 20 Lines • Show All 141 Lines • ▼ Show 20 Lines	for (unsigned i = 0; i < Occurences.size(); ++i) {
return false;		return false;
}		}
}		}
return true;		return true;
}		}
};		};
}		}

		/// \brief Prints the macro name that contains the given SourceLocation into
		/// the given raw_string_ostream.
		static void printMacroName(llvm::raw_string_ostream &MacroStack,
		ASTContext &Context, SourceLocation Loc) {
		MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(),
		Context.getLangOpts());

		// Add an empty space at the end as a padding to prevent
		// that macro names concatenate to the names of other macros.
		MacroStack << " ";
		}

		/// \brief Returns a string that represents all macro expansions that
		/// expanded into the given SourceLocation.
		///
		/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations
		/// A and B are expanded from the same macros in the same order.
		static std::string getMacroStack(SourceLocation Loc, ASTContext &Context) {
		std::string MacroStack;
		llvm::raw_string_ostream MacroStackStream(MacroStack);
		SourceManager &SM = Context.getSourceManager();

		// Iterate over all macros that expanded into the given SourceLocation.
		while (Loc.isMacroID()) {
		// Add the macro name to the stream.
		printMacroName(MacroStackStream, Context, Loc);
		Loc = SM.getImmediateMacroCallerLoc(Loc);
		}
		MacroStackStream.flush();
		return MacroStack;
		}

namespace {		namespace {
/// \brief Collects the data of a single Stmt.		/// \brief Collects the data of a single Stmt.
///		///
/// This class defines what a code clone is: If it collects for two statements		/// This class defines what a code clone is: If it collects for two statements
/// the same data, then those two statements are considered to be clones of each		/// the same data, then those two statements are considered to be clones of each
/// other.		/// other.
class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector> {		class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector> {

▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines
// Utility macro for defining a visit method for a given class. This method		// Utility macro for defining a visit method for a given class. This method
// calls back to the ConstStmtVisitor to visit all parent classes.		// calls back to the ConstStmtVisitor to visit all parent classes.
#define DEF_ADD_DATA(CLASS, CODE) \		#define DEF_ADD_DATA(CLASS, CODE) \
void Visit##CLASS(const CLASS *S) { \		void Visit##CLASS(const CLASS *S) { \
CODE; \		CODE; \
ConstStmtVisitor<StmtDataCollector>::Visit##CLASS(S); \		ConstStmtVisitor<StmtDataCollector>::Visit##CLASS(S); \
}		}

DEF_ADD_DATA(Stmt, { addData(S->getStmtClass()); })		DEF_ADD_DATA(Stmt, {
		addData(S->getStmtClass());
		// This ensures that macro generated code isn't identical to macro-generated
		// code.
		addData(getMacroStack(S->getLocStart(), Context));
		addData(getMacroStack(S->getLocEnd(), Context));
		})
DEF_ADD_DATA(Expr, { addData(S->getType()); })		DEF_ADD_DATA(Expr, { addData(S->getType()); })

//--- Builtin functionality ----------------------------------------------//		//--- Builtin functionality ----------------------------------------------//
DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); })		DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); })
DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); })		DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); })
DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); })		DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); })
DEF_ADD_DATA(TypeTraitExpr, {		DEF_ADD_DATA(TypeTraitExpr, {
addData(S->getTrait());		addData(S->getTrait());
▲ Show 20 Lines • Show All 100 Lines • ▼ Show 20 Lines	class CloneSignatureGenerator {

CloneDetector &CD;		CloneDetector &CD;
ASTContext &Context;		ASTContext &Context;

/// \brief Generates CloneSignatures for all statements in the given statement		/// \brief Generates CloneSignatures for all statements in the given statement
/// tree and stores them in the CloneDetector.		/// tree and stores them in the CloneDetector.
///		///
/// \param S The root of the given statement tree.		/// \param S The root of the given statement tree.
		/// \param ParentMacroStack A string representing the macros that generated
		/// the parent statement or an empty string if no
		/// macros generated the parent statement.
		/// See getMacroStack() for generating such a string.
/// \return The CloneSignature of the root statement.		/// \return The CloneSignature of the root statement.
CloneDetector::CloneSignature generateSignatures(const Stmt *S) {		CloneDetector::CloneSignature
		generateSignatures(const Stmt *S, const std::string &ParentMacroStack) {
// Create an empty signature that will be filled in this method.		// Create an empty signature that will be filled in this method.
CloneDetector::CloneSignature Signature;		CloneDetector::CloneSignature Signature;

// Collect all relevant data from S and put it into the empty signature.		// Collect all relevant data from S and put it into the empty signature.
StmtDataCollector(S, Context, Signature.Data);		StmtDataCollector(S, Context, Signature.Data);

		// Look up what macros expanded into the current statement.
		std::string StartMacroStack = getMacroStack(S->getLocStart(), Context);
		std::string EndMacroStack = getMacroStack(S->getLocEnd(), Context);

		// First, check if ParentMaroStack is not empty which means we are currently
		// dealing with a parent statement which was expanded from a macro.
		// If this parent statement was expanded from the same macros as this
		// statement, we reduce the initial complexity of this statement to zero.
		// This causes that a group of statements that were generated by a single
		// macro expansion will only increase the total complexity by one.
		// Note: This is not the final complexity of this statement as we still
		// add the complexity of the child statements to the complexity value.
		if (!ParentMacroStack.empty() && (StartMacroStack == ParentMacroStack &&
		EndMacroStack == ParentMacroStack)) {
		Signature.Complexity = 0;
		}

// Storage for the signatures of the direct child statements. This is only		// Storage for the signatures of the direct child statements. This is only
// needed if the current statement is a CompoundStmt.		// needed if the current statement is a CompoundStmt.
std::vector<CloneDetector::CloneSignature> ChildSignatures;		std::vector<CloneDetector::CloneSignature> ChildSignatures;
const CompoundStmt *CS = dyn_cast<const CompoundStmt>(S);		const CompoundStmt *CS = dyn_cast<const CompoundStmt>(S);

// The signature of a statement includes the signatures of its children.		// The signature of a statement includes the signatures of its children.
// Therefore we create the signatures for every child and add them to the		// Therefore we create the signatures for every child and add them to the
		v.g.vassilevUnsubmitted Done Reply Inline Actions What are complex macros? Could you clarify? v.g.vassilev: What are complex macros? Could you clarify?
// current signature.		// current signature.
for (const Stmt *Child : S->children()) {		for (const Stmt *Child : S->children()) {
// Some statements like 'if' can have nullptr children that we will skip.		// Some statements like 'if' can have nullptr children that we will skip.
if (!Child)		if (!Child)
continue;		continue;
		omtcyfzUnsubmitted Done Reply Inline Actions Do I understand correctly that a code generated by a macro doesn't affect "complexity" at all then? TEST_F(QueryParserTest, Complete) { std::vector<llvm::LineEditor::Completion> Comps = QueryParser::complete("", 0, QS); ASSERT_EQ(6u, Comps.size()); EXPECT_EQ("help ", Comps[0].TypedText); EXPECT_EQ("help", Comps[0].DisplayText); EXPECT_EQ("let ", Comps[1].TypedText); EXPECT_EQ("let", Comps[1].DisplayText); EXPECT_EQ("match ", Comps[2].TypedText); EXPECT_EQ("match", Comps[2].DisplayText); EXPECT_EQ("set ", Comps[3].TypedText); EXPECT_EQ("set", Comps[3].DisplayText); EXPECT_EQ("unlet ", Comps[4].TypedText); EXPECT_EQ("unlet", Comps[4].DisplayText); EXPECT_EQ("quit", Comps[5].DisplayText); EXPECT_EQ("quit ", Comps[5].TypedText); Comps = QueryParser::complete("set o", 5, QS); ASSERT_EQ(1u, Comps.size()); EXPECT_EQ("utput ", Comps[0].TypedText); EXPECT_EQ("output", Comps[0].DisplayText); Comps = QueryParser::complete("match while", 11, QS); ASSERT_EQ(1u, Comps.size()); EXPECT_EQ("Stmt(", Comps[0].TypedText); EXPECT_EQ("Matcher<Stmt> whileStmt(Matcher<WhileStmt>...)", Comps[0].DisplayText); } This is an actual piece of code from `extra/unittests/clang-query/QueryParserTest.cpp`. Yes, it is a test, but it still is a nice example of how many macros can be found in code (especially if we are talking about pure C or some weird C++). Thus, I think it is reasonable to treat macro invocation as a `1`-"complexity" node. omtcyfz: Do I understand correctly that a code generated by a macro doesn't affect "complexity" at all…
		NoQUnsubmitted Done Reply Inline Actions This "0" is not for the macro itself, but for the statements into which it expands. Macro itself is not a statement. If we put "1" here, it would produce a lot more complexity than you want. That said, it's a good idea to treat every macro as a "complexity-1" statement, just need to figure out how to implement that correctly :) Perhaps scan the source range of the sequence for how many different macro expansions are included, and add that number to complexity(?) NoQ: This "0" is not for the macro itself, but for the statements into which it expands. Macro…
		omtcyfzUnsubmitted Done Reply Inline Actions This "0" is not for the macro itself, but for the statements into which it expands. Macro itself is not a statement. If we put "1" here, it would produce a lot more complexity than you want. Sure, I understand that, this is why I didn't suggest putting `1` there. Perhaps scan the source range of the sequence for how many different macro expansions are included, and add that number to complexity(?) Yes, this is exactly the solution that would work. Since macros aren't in the AST we'd need to get through SourceRange anyway. omtcyfz: > This "0" is not for the macro itself, but for the statements into which it expands. Macro…
		omtcyfzUnsubmitted Done Reply Inline Actions Though, it has to be optimized in order to prevent parsing a SourceLocation multiple times. omtcyfz: Though, it has to be optimized in order to prevent parsing a SourceLocation multiple times.
		omtcyfzUnsubmitted Done Reply Inline Actions visiting each SourceLocation omtcyfz:* *visiting each SourceLocation
		NoQUnsubmitted Done Reply Inline Actions Yeah, as a rough approximation we could count macro expansions within the current statement's children... NoQ: Yeah, as a rough approximation we could count macro expansions within the current statement's…
		teemperorAuthorUnsubmitted Not Done Reply Inline Actions I'm now checking all expanded macros of the start/end locations. This should handle everything if I see that correctly (beside empty non-function macros which I marked as false-positives - not sure how we best handle them). teemperor: I'm now checking all expanded macros of the start/end locations. This should handle everything…

// Recursive call to create the signature of the child statement. This		// Recursive call to create the signature of the child statement. This
// will also create and store all clone groups in this child statement.		// will also create and store all clone groups in this child statement.
auto ChildSignature = generateSignatures(Child);		// We pass only the StartMacroStack along to keep things simple.
		auto ChildSignature = generateSignatures(Child, StartMacroStack);

// Add the collected data to the signature of the current statement.		// Add the collected data to the signature of the current statement.
Signature.add(ChildSignature);		Signature.add(ChildSignature);

// If the current statement is a CompoundStatement, we need to store the		// If the current statement is a CompoundStatement, we need to store the
// signature for the generation of the sub-sequences.		// signature for the generation of the sub-sequences.
if (CS)		if (CS)
ChildSignatures.push_back(ChildSignature);		ChildSignatures.push_back(ChildSignature);
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	void handleSubSequences(
}		}
}		}

public:		public:
explicit CloneSignatureGenerator(CloneDetector &CD, ASTContext &Context)		explicit CloneSignatureGenerator(CloneDetector &CD, ASTContext &Context)
: CD(CD), Context(Context) {}		: CD(CD), Context(Context) {}

/// \brief Generates signatures for all statements in the given function body.		/// \brief Generates signatures for all statements in the given function body.
void consumeCodeBody(const Stmt *S) { generateSignatures(S); }		void consumeCodeBody(const Stmt *S) { generateSignatures(S, ""); }
};		};
} // end anonymous namespace		} // end anonymous namespace

void CloneDetector::analyzeCodeBody(const Decl *D) {		void CloneDetector::analyzeCodeBody(const Decl *D) {
assert(D);		assert(D);
assert(D->hasBody());		assert(D->hasBody());
CloneSignatureGenerator Generator(*this, D->getASTContext());		CloneSignatureGenerator Generator(*this, D->getASTContext());
Generator.consumeCodeBody(D->getBody());		Generator.consumeCodeBody(D->getBody());
▲ Show 20 Lines • Show All 135 Lines • Show Last 20 Lines

test/Analysis/copypaste/macro-complexity.cpp

This file was added.

				// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:MinimumCloneComplexity=10 -verify %s

				// Tests that the complexity value of a macro expansion is about the same as
				// the complexity value of a normal function call and the the macro body doesn't
				// influence the complexity. See the CloneSignature class in CloneDetection.h
				// for more information about complexity values of clones.

				#define MACRO_FOO(a, b) a > b ? -a * a : -b * b;

				// First, manually apply MACRO_FOO and see if the code gets detected as a clone.
				// This confirms that with the current configuration the macro body would be
				// considered large enough to pass the MinimumCloneComplexity constraint.

				int manualMacro(int a, int b) { // expected-warning{{Detected code clone.}}
				return a > b ? -a * a : -b * b;
				}

				int manualMacroClone(int a, int b) { // expected-note{{Related code clone is here.}}
				return a > b ? -a * a : -b * b;
				}
				v.g.vassilevUnsubmitted Done Reply Inline Actions I am not sure I understand this comment. Could you reword? v.g.vassilev: I am not sure I understand this comment. Could you reword?
				NoQUnsubmitted Done Reply Inline Actions Without macros, the same code would constitute a complex clone. Wrapping code into macros reduces complexity of the code. This tests the test above. ^(tried out some reword-ings) NoQ: Without macros, the same code would constitute a complex clone. Wrapping code into macros…

				// Now we actually use the macro to generate the same AST as above. They
				// shouldn't be reported because the macros only slighly increase the complexity
				// value and the resulting code will never pass the MinimumCloneComplexity
				// constraint.

				int macro(int a, int b) {
				return MACRO_FOO(a, b);
				}

				int macroClone(int a, int b) {
				return MACRO_FOO(a, b);
				}

				// So far we only tested that macros increase the complexity by a lesser amount
				// than normal code. We also need to be sure this amount is not zero because
				// we otherwise macro code would be 'invisible' for the CloneDetector.
				// This tests that it is possible to increase the reach the minimum complexity
				// by only using macros. This is only possible if the complexity value is bigger
				// than zero.

				#define NEG(A) -(A)

				int nestedMacros() { // expected-warning{{Detected code clone.}}
				return NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(1))))))))));
				}

				int nestedMacrosClone() { // expected-note{{Related code clone is here.}}
				return NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(1))))))))));
				}

test/Analysis/copypaste/macros.cpp

This file was added.

				// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s

				// Tests that macros and non-macro clones aren't mixed into the same hash
				// group. This is currently necessary as all clones in a hash group need
				// to have the same complexity value. Macros have smaller complexity values
				// and need to be in their own hash group.

				int foo(int a) { // expected-warning{{Detected code clone.}}
				a = a + 1;
				a = a + 1 / 1;
				a = a + 1 + 1 + 1;
				a = a + 1 - 1 + 1 + 1;
				v.g.vassilevUnsubmitted Done Reply Inline Actions Wouldn't it be a good idea to have a fixit hint, saying "Did you mean to use ABS(a,b)". If the suggestion is applied, it would make the code more consistent, however it would encourage using preprocessor tricks (which is not always considered as good practice). v.g.vassilev: Wouldn't it be a good idea to have a fixit hint, saying "Did you mean to use ABS(a,b)". If the…
				teemperorAuthorUnsubmitted Not Done Reply Inline Actions I don't think detecting clones between macro definitions and normal code is easily possible. Doing the same for functions however is certainly possible (i.e. "did you meant to call `max(a, b)`). I added that to the open points list. teemperor: I don't think detecting clones between macro definitions and normal code is easily possible.
				a = a + 1 * 1 + 1 + 1 + 1;
				a = a + 1 / 1 + 1 + 1 + 1;
				return a;
				}

				int fooClone(int a) { // expected-note{{Related code clone is here.}}
				a = a + 1;
				a = a + 1 / 1;
				a = a + 1 + 1 + 1;
				a = a + 1 - 1 + 1 + 1;
				a = a + 1 * 1 + 1 + 1 + 1;
				a = a + 1 / 1 + 1 + 1 + 1;
				return a;
				}

				// Below is the same AST as above but this time generated with macros. The
				// clones below should land in their own hash group for the reasons given above.

				#define ASSIGN(T, V) T = T + V

				int macro(int a) { // expected-warning{{Detected code clone.}}
				ASSIGN(a, 1);
				ASSIGN(a, 1 / 1);
				ASSIGN(a, 1 + 1 + 1);
				ASSIGN(a, 1 - 1 + 1 + 1);
				ASSIGN(a, 1 * 1 + 1 + 1 + 1);
				ASSIGN(a, 1 / 1 + 1 + 1 + 1);
				return a;
				}

				int macroClone(int a) { // expected-note{{Related code clone is here.}}
				ASSIGN(a, 1);
				ASSIGN(a, 1 / 1);
				ASSIGN(a, 1 + 1 + 1);
				ASSIGN(a, 1 - 1 + 1 + 1);
				ASSIGN(a, 1 * 1 + 1 + 1 + 1);
				ASSIGN(a, 1 / 1 + 1 + 1 + 1);
				return a;
				}

This is an archive of the discontinued LLVM Phabricator instance.

[analyzer] Fixed the false-positives caused by macro generated code.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 68556

include/clang/Analysis/CloneDetection.h

lib/Analysis/CloneDetection.cpp

test/Analysis/copypaste/macro-complexity.cpp

test/Analysis/copypaste/macros.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[analyzer] Fixed the false-positives caused by macro generated code.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 68556

include/clang/Analysis/CloneDetection.h

lib/Analysis/CloneDetection.cpp

test/Analysis/copypaste/macro-complexity.cpp

test/Analysis/copypaste/macros.cpp

[analyzer] Fixed the false-positives caused by macro generated code.
ClosedPublic