Index: clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h =================================================================== --- clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h +++ clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h @@ -18,6 +18,9 @@ /// Finds possible inefficient `std::vector` operations (e.g. `push_back`) in /// for loops that may cause unnecessary memory reallocations. /// +/// When EnableProto, also finds calls that add element to protobuf repeated +/// field without calling Reserve() first. +/// /// For the user-facing documentation see: /// http://clang.llvm.org/extra/clang-tidy/checks/performance-inefficient-vector-operation.html class InefficientVectorOperationCheck : public ClangTidyCheck { @@ -28,7 +31,14 @@ void storeOptions(ClangTidyOptions::OptionMap &Opts) override; private: + void AddMatcher(const ast_matchers::DeclarationMatcher &TargetRecordDecl, + StringRef VarDeclName, StringRef VarDeclStmtName, + const ast_matchers::DeclarationMatcher &AppendMethodDecl, + StringRef AppendCallName, ast_matchers::MatchFinder *Finder); const std::vector VectorLikeClasses; + + // If true, also check inefficient operations for proto repeated fields. + bool EnableProto; }; } // namespace performance Index: clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp =================================================================== --- clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp +++ clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp @@ -29,26 +29,39 @@ // for (int i = 0; i < 10 + 1; ++i) { // v.push_back(i); // } +// +// SomeProto p; +// for (int i = 0; i < 10 + 1; ++i) { +// p.add_xxx(i); +// } // } // \endcode // // The matcher names are bound to following parts of the AST: // - LoopCounterName: The entire for loop (as ForStmt). // - LoopParentName: The body of function f (as CompoundStmt). -// - VectorVarDeclName: 'v' in (as VarDecl). +// - VectorVarDeclName: 'v' (as VarDecl). // - VectorVarDeclStmatName: The entire 'std::vector v;' statement (as // DeclStmt). // - PushBackOrEmplaceBackCallName: 'v.push_back(i)' (as cxxMemberCallExpr). // - LoopInitVarName: 'i' (as VarDecl). // - LoopEndExpr: '10+1' (as Expr). +// If EnableProto, the proto related names are bound to the following parts: +// - ProtoRecordDeclName: 'SomeProto'. +// - ProtoVarDeclName: 'p' (as VarDecl). +// - ProtoVarDeclStmtName: The entire 'SomeProto p;' statement (as DeclStmt). +// - ProtoAddFieldCallName: 'p.add_xxx(i)' (as cxxMemberCallExpr). static const char LoopCounterName[] = "for_loop_counter"; static const char LoopParentName[] = "loop_parent"; static const char VectorVarDeclName[] = "vector_var_decl"; static const char VectorVarDeclStmtName[] = "vector_var_decl_stmt"; static const char PushBackOrEmplaceBackCallName[] = "append_call"; +static const char ProtoRecordDeclName[] = "proto_record_decl"; +static const char ProtoVarDeclName[] = "proto_var_decl"; +static const char ProtoVarDeclStmtName[] = "proto_var_decl_stmt"; +static const char ProtoAddFieldCallName[] = "proto_add_field"; static const char LoopInitVarName[] = "loop_init_var"; static const char LoopEndExprName[] = "loop_end_expr"; - static const char RangeLoopName[] = "for_range_loop"; ast_matchers::internal::Matcher supportedContainerTypesMatcher() { @@ -63,34 +76,34 @@ StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), VectorLikeClasses(utils::options::parseStringList( - Options.get("VectorLikeClasses", "::std::vector"))) {} + Options.get("VectorLikeClasses", "::std::vector"))), + EnableProto(Options.getLocalOrGlobal("EnableProto", false)) {} void InefficientVectorOperationCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "VectorLikeClasses", utils::options::serializeStringList(VectorLikeClasses)); + Options.store(Opts, "EnableProto", static_cast(EnableProto)); } -void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) { - const auto VectorDecl = cxxRecordDecl(hasAnyName(SmallVector( - VectorLikeClasses.begin(), VectorLikeClasses.end()))); - const auto VectorDefaultConstructorCall = cxxConstructExpr( - hasType(VectorDecl), +void InefficientVectorOperationCheck::AddMatcher( + const DeclarationMatcher &TargetRecordDecl, StringRef VarDeclName, + StringRef VarDeclStmtName, const DeclarationMatcher &AppendMethodDecl, + StringRef AppendCallName, MatchFinder *Finder) { + const auto DefaultConstructorCall = cxxConstructExpr( + hasType(TargetRecordDecl), hasDeclaration(cxxConstructorDecl(isDefaultConstructor()))); - const auto VectorVarDecl = - varDecl(hasInitializer(VectorDefaultConstructorCall)) - .bind(VectorVarDeclName); - const auto VectorAppendCallExpr = - cxxMemberCallExpr( - callee(cxxMethodDecl(hasAnyName("push_back", "emplace_back"))), - on(hasType(VectorDecl)), - onImplicitObjectArgument(declRefExpr(to(VectorVarDecl)))) - .bind(PushBackOrEmplaceBackCallName); - const auto VectorAppendCall = expr(ignoringImplicit(VectorAppendCallExpr)); - const auto VectorVarDefStmt = - declStmt(hasSingleDecl(equalsBoundNode(VectorVarDeclName))) - .bind(VectorVarDeclStmtName); + const auto TargetVarDecl = + varDecl(hasInitializer(DefaultConstructorCall)).bind(VarDeclName); + const auto TargetVarDefStmt = declStmt(hasSingleDecl(equalsBoundNode(VarDeclName))) + .bind(VarDeclStmtName); + const auto AppendCallExpr = + cxxMemberCallExpr( + callee(AppendMethodDecl), on(hasType(TargetRecordDecl)), + onImplicitObjectArgument(declRefExpr(to(TargetVarDecl)))) + .bind(AppendCallName); + const auto AppendCall = expr(ignoringImplicit(AppendCallExpr)); const auto LoopVarInit = declStmt(hasSingleDecl(varDecl(hasInitializer(integerLiteral(equals(0)))) .bind(LoopInitVarName))); @@ -99,14 +112,16 @@ // Matchers for the loop whose body has only 1 push_back/emplace_back calling // statement. - const auto HasInterestingLoopBody = - hasBody(anyOf(compoundStmt(statementCountIs(1), has(VectorAppendCall)), - VectorAppendCall)); + const auto HasInterestingLoopBody = hasBody( + anyOf(compoundStmt(statementCountIs(1), has(AppendCall)), AppendCall)); const auto InInterestingCompoundStmt = - hasParent(compoundStmt(has(VectorVarDefStmt)).bind(LoopParentName)); + hasParent(compoundStmt(has(TargetVarDefStmt)).bind(LoopParentName)); // Match counter-based for loops: - // for (int i = 0; i < n; ++i) { v.push_back(...); } + // for (int i = 0; i < n; ++i) { + // v.push_back(...); + // // Or: proto.add_xxx(...); + // } // // FIXME: Support more types of counter-based loops like decrement loops. Finder->addMatcher( @@ -123,7 +138,10 @@ this); // Match for-range loops: - // for (const auto& E : data) { v.push_back(...); } + // for (const auto& E : data) { + // v.push_back(...); + // // Or: proto.add_xxx(...); + // } // // FIXME: Support more complex range-expressions. Finder->addMatcher( @@ -134,6 +152,23 @@ this); } +void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) { + const auto VectorDecl = cxxRecordDecl(hasAnyName(SmallVector( + VectorLikeClasses.begin(), VectorLikeClasses.end()))); + const auto AppendMethodDecl = + cxxMethodDecl(hasAnyName("push_back", "emplace_back")); + AddMatcher(VectorDecl, VectorVarDeclName, VectorVarDeclStmtName, + AppendMethodDecl, PushBackOrEmplaceBackCallName, Finder); + + if (EnableProto) { + const auto ProtoDecl = cxxRecordDecl(isDerivedFrom("::proto2::MessageLite")) + .bind(ProtoRecordDeclName); + const auto AddFieldMethodDecl = cxxMethodDecl(matchesName("::add_")); + AddMatcher(ProtoDecl, ProtoVarDeclName, ProtoVarDeclStmtName, + AddFieldMethodDecl, ProtoAddFieldCallName, Finder); + } +} + void InefficientVectorOperationCheck::check( const MatchFinder::MatchResult &Result) { auto* Context = Result.Context; @@ -148,64 +183,114 @@ Result.Nodes.getNodeAs(RangeLoopName); const auto *VectorAppendCall = Result.Nodes.getNodeAs(PushBackOrEmplaceBackCallName); + const auto *ProtoRecordDecl = + Result.Nodes.getNodeAs(ProtoRecordDeclName); + const auto *ProtoVarDecl = Result.Nodes.getNodeAs(ProtoVarDeclName); + const auto *ProtoAddFieldCall = + Result.Nodes.getNodeAs(ProtoAddFieldCallName); const auto *LoopEndExpr = Result.Nodes.getNodeAs(LoopEndExprName); const auto *LoopParent = Result.Nodes.getNodeAs(LoopParentName); + const CXXMemberCallExpr *AppendCall = VectorAppendCall; + if (!AppendCall) + AppendCall = ProtoAddFieldCall; + if (!AppendCall) + return; + const Stmt *LoopStmt = ForLoop; if (!LoopStmt) LoopStmt = RangeLoop; - llvm::SmallPtrSet AllVectorVarRefs = - utils::decl_ref_expr::allDeclRefExprs(*VectorVarDecl, *LoopParent, - *Context); - for (const auto *Ref : AllVectorVarRefs) { - // Skip cases where there are usages (defined as DeclRefExpr that refers to - // "v") of vector variable `v` before the for loop. We consider these usages - // are operations causing memory preallocation (e.g. "v.resize(n)", - // "v.reserve(n)"). - // - // FIXME: make it more intelligent to identify the pre-allocating operations - // before the for loop. - if (SM.isBeforeInTranslationUnit(Ref->getLocation(), - LoopStmt->getBeginLoc())) { - return; + std::string PartialReserveStmt; + if (VectorAppendCall != nullptr) { + llvm::SmallPtrSet AllVectorVarRefs = + utils::decl_ref_expr::allDeclRefExprs(*VectorVarDecl, *LoopParent, + *Context); + for (const auto *Ref : AllVectorVarRefs) { + // Skip cases where there are usages (defined as DeclRefExpr that refers + // to "v") of vector variable `v` before the for loop. We consider these + // usages are operations causing memory preallocation (e.g. "v.resize(n)", + // "v.reserve(n)"). + // + // FIXME: make it more intelligent to identify the pre-allocating + // operations before the for loop. + if (SM.isBeforeInTranslationUnit(Ref->getLocation(), + LoopStmt->getBeginLoc())) { + return; + } + } + PartialReserveStmt = ".reserve"; + } else { + std::string AddFieldName = + ProtoAddFieldCall->getMethodDecl()->getNameAsString(); + std::string MutableFieldName = + ("mutable_" + ProtoAddFieldCall->getMethodDecl()->getName().substr(4)) + .str(); + { + auto Matches = match(cxxRecordDecl(hasMethod(hasName(MutableFieldName))), + *ProtoRecordDecl, *Context); + if (Matches.empty()) { + // There is no method with name "mutable_xxx". + return; + } } + + { + const auto MutableFieldCallExpr = cxxMemberCallExpr( + callee(cxxMethodDecl(hasAnyName({MutableFieldName, AddFieldName}))), + onImplicitObjectArgument( + declRefExpr(to(varDecl(equalsNode(ProtoVarDecl)))))); + auto matches = + match(findAll(MutableFieldCallExpr.bind("maybe_reallocation")), + *LoopParent, *Context); + for (const auto &match : matches) { + const auto call = + match.getNodeAs("maybe_reallocation"); + // Skip cases where "mutable_xxx" or "add_xxx" is called before the + // loop. + if (SM.isBeforeInTranslationUnit(call->getEndLoc(), + LoopStmt->getBeginLoc())) { + return; + } + } + } + PartialReserveStmt = "." + MutableFieldName + + "()->Reserve"; // e.g., ".mutable_xxx()->Reserve" } - llvm::StringRef VectorVarName = Lexer::getSourceText( + llvm::StringRef VarName = Lexer::getSourceText( CharSourceRange::getTokenRange( - VectorAppendCall->getImplicitObjectArgument()->getSourceRange()), + AppendCall->getImplicitObjectArgument()->getSourceRange()), SM, Context->getLangOpts()); - std::string ReserveStmt; + std::string ReserveSize; // Handle for-range loop cases. if (RangeLoop) { // Get the range-expression in a for-range statement represented as // `for (range-declarator: range-expression)`. - StringRef RangeInitExpName = Lexer::getSourceText( - CharSourceRange::getTokenRange( - RangeLoop->getRangeInit()->getSourceRange()), - SM, Context->getLangOpts()); - - ReserveStmt = - (VectorVarName + ".reserve(" + RangeInitExpName + ".size()" + ");\n") - .str(); + StringRef RangeInitExpName = + Lexer::getSourceText(CharSourceRange::getTokenRange( + RangeLoop->getRangeInit()->getSourceRange()), + SM, Context->getLangOpts()); + ReserveSize = (RangeInitExpName + ".size()").str(); } else if (ForLoop) { // Handle counter-based loop cases. StringRef LoopEndSource = Lexer::getSourceText( CharSourceRange::getTokenRange(LoopEndExpr->getSourceRange()), SM, Context->getLangOpts()); - ReserveStmt = (VectorVarName + ".reserve(" + LoopEndSource + ");\n").str(); + ReserveSize = LoopEndSource; } - auto Diag = - diag(VectorAppendCall->getBeginLoc(), - "%0 is called inside a loop; " - "consider pre-allocating the vector capacity before the loop") - << VectorAppendCall->getMethodDecl()->getDeclName(); - - if (!ReserveStmt.empty()) + auto Diag = diag(AppendCall->getBeginLoc(), + "%0 is called inside a loop; consider pre-allocating the %1 " + "capacity before the loop") + << AppendCall->getMethodDecl()->getDeclName() + << (VectorAppendCall != nullptr ? "vector" : "repeated field"); + if (!ReserveSize.empty()) { + std::string ReserveStmt = + (VarName + PartialReserveStmt + "(" + ReserveSize + ");\n").str(); Diag << FixItHint::CreateInsertion(LoopStmt->getBeginLoc(), ReserveStmt); + } } } // namespace performance Index: clang-tools-extra/docs/clang-tidy/checks/performance-inefficient-vector-operation.rst =================================================================== --- clang-tools-extra/docs/clang-tidy/checks/performance-inefficient-vector-operation.rst +++ clang-tools-extra/docs/clang-tidy/checks/performance-inefficient-vector-operation.rst @@ -6,6 +6,10 @@ Finds possible inefficient ``std::vector`` operations (e.g. ``push_back``, ``emplace_back``) that may cause unnecessary memory reallocations. +It can also find calls that add element to protobuf repeated field in a loop +without calling Reserve() before the loop. Calling Reserve() first can avoid +unnecessary memory reallocations. + Currently, the check only detects following kinds of loops with a single statement body: @@ -21,6 +25,13 @@ // statement before the for statement. } + SomeProto p; + for (int i = 0; i < n; ++i) { + p.add_xxx(n); + // This will trigger the warning since the add_xxx may cause multiple memory + // relloacations. This can be avoid by inserting a + // 'p.mutable_xxx().Reserve(n)' statement before the for statement. + } * For-range loops like ``for (range-declaration : range_expression)``, the type of ``range_expression`` can be ``std::vector``, ``std::array``, @@ -47,3 +58,8 @@ Semicolon-separated list of names of vector-like classes. By default only ``::std::vector`` is considered. + +.. option:: EnableProto + When non-zero, the check will also warn on inefficient operations for proto + repeated fields. Otherwise, the check only warns on inefficient vector + operations. Default is `0`. Index: clang-tools-extra/test/clang-tidy/performance-inefficient-vector-operation.cpp =================================================================== --- clang-tools-extra/test/clang-tidy/performance-inefficient-vector-operation.cpp +++ clang-tools-extra/test/clang-tidy/performance-inefficient-vector-operation.cpp @@ -1,4 +1,7 @@ -// RUN: %check_clang_tidy %s performance-inefficient-vector-operation %t -- -format-style=llvm +// RUN: %check_clang_tidy %s performance-inefficient-vector-operation %t -- \ +// RUN: -format-style=llvm \ +// RUN: -config='{CheckOptions: \ +// RUN: [{key: performance-inefficient-vector-operation.EnableProto, value: 1}]}' namespace std { @@ -62,6 +65,27 @@ int Op(int); +namespace proto2 { +class MessageLite {}; +class Message : public MessageLite {}; +} // namespace proto2 + +class FooProto : public proto2::Message { +public: + int *add_x(); + void add_x(int x); + void mutable_x(); + void mutable_y(); +}; + +class BarProto : public proto2::Message { +public: + int *add_x(); + void add_x(int x); + void mutable_x(); + void mutable_y(); +}; + void f(std::vector& t) { { std::vector v0; @@ -162,6 +186,24 @@ } } + { + FooProto foo; + // CHECK-FIXES: foo.mutable_x()->Reserve(5); + for (int i = 0; i < 5; i++) { + foo.add_x(i); + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: 'add_x' is called inside a loop; consider pre-allocating the repeated field capacity before the loop + } + } + { + FooProto foo; + foo.mutable_y(); + // CHECK-FIXES: foo.mutable_x()->Reserve(5); + for (int i = 0; i < 5; i++) { + foo.add_x(i); + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: 'add_x' is called inside a loop + } + } + // ---- Non-fixed Cases ---- { std::vector z0; @@ -274,4 +316,39 @@ z12.push_back(e); } } + + { + FooProto foo; + foo.mutable_x(); + // CHECK-FIXES-NOT: foo.mutable_x->Reserve(5); + for (int i = 0; i < 5; i++) { + foo.add_x(i); + } + } + { + FooProto foo; + // CHECK-FIXES-NOT: foo.mutable_x->Reserve(5); + for (int i = 0; i < 5; i++) { + foo.add_x(i); + foo.add_x(i); + } + } + { + FooProto foo; + // CHECK-FIXES-NOT: foo.mutable_x->Reserve(5); + foo.add_x(-1); + for (int i = 0; i < 5; i++) { + foo.add_x(i); + } + } + { + FooProto foo; + BarProto bar; + bar.mutable_x(); + // CHECK-FIXES-NOT: foo.mutable_x->Reserve(5); + for (int i = 0; i < 5; i++) { + foo.add_x(); + bar.add_x(); + } + } }