Skip to content

Commit c5cc033

Browse files
committedApr 18, 2017
[clang-tidy] Add a clang-tidy check for possible inefficient vector operations
Summary: The "performance-inefficient-vector-operation" check finds vector oprations in for-loop statements which may cause multiple memory reallocations. This is the first version, only detects typical for-loop: ``` std::vector<int> v; for (int i = 0; i < n; ++i) { v.push_back(i); } // or for (int i = 0; i < v2.size(); ++i) { v.push_back(v2[i]); } ``` We can extend it to handle more cases like for-range loop in the future. Reviewers: alexfh, aaron.ballman Reviewed By: aaron.ballman Subscribers: zaks.anna, Eugene.Zelenko, mgorny, cfe-commits, djasper Differential Revision: https://reviews.llvm.org/D31757 llvm-svn: 300534
1 parent 7461972 commit c5cc033

8 files changed

+400
-1
lines changed
 

Diff for: ‎clang-tools-extra/clang-tidy/performance/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_clang_library(clangTidyPerformanceModule
55
ForRangeCopyCheck.cpp
66
ImplicitCastInLoopCheck.cpp
77
InefficientStringConcatenationCheck.cpp
8+
InefficientVectorOperationCheck.cpp
89
PerformanceTidyModule.cpp
910
TypePromotionInMathFnCheck.cpp
1011
UnnecessaryCopyInitialization.cpp
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
//===--- InefficientVectorOperationCheck.cpp - clang-tidy------------------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#include "InefficientVectorOperationCheck.h"
11+
#include "clang/AST/ASTContext.h"
12+
#include "clang/ASTMatchers/ASTMatchFinder.h"
13+
#include "clang/Lex/Lexer.h"
14+
#include "../utils/DeclRefExprUtils.h"
15+
16+
using namespace clang::ast_matchers;
17+
18+
namespace clang {
19+
namespace tidy {
20+
namespace performance {
21+
22+
namespace {
23+
24+
// Matcher names. Given the code:
25+
//
26+
// \code
27+
// void f() {
28+
// vector<T> v;
29+
// for (int i = 0; i < 10 + 1; ++i) {
30+
// v.push_back(i);
31+
// }
32+
// }
33+
// \endcode
34+
//
35+
// The matcher names are bound to following parts of the AST:
36+
// - LoopName: The entire for loop (as ForStmt).
37+
// - LoopParentName: The body of function f (as CompoundStmt).
38+
// - VectorVarDeclName: 'v' in (as VarDecl).
39+
// - VectorVarDeclStmatName: The entire 'std::vector<T> v;' statement (as
40+
// DeclStmt).
41+
// - PushBackCallName: 'v.push_back(i)' (as cxxMemberCallExpr).
42+
// - LoopInitVarName: 'i' (as VarDecl).
43+
// - LoopEndExpr: '10+1' (as Expr).
44+
static const char LoopCounterName[] = "for_loop_counter";
45+
static const char LoopParentName[] = "loop_parent";
46+
static const char VectorVarDeclName[] = "vector_var_decl";
47+
static const char VectorVarDeclStmtName[] = "vector_var_decl_stmt";
48+
static const char PushBackCallName[] = "push_back_call";
49+
50+
static const char LoopInitVarName[] = "loop_init_var";
51+
static const char LoopEndExprName[] = "loop_end_expr";
52+
53+
} // namespace
54+
55+
void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) {
56+
const auto VectorDecl = cxxRecordDecl(hasName("::std::vector"));
57+
const auto VectorDefaultConstructorCall = cxxConstructExpr(
58+
hasType(VectorDecl),
59+
hasDeclaration(cxxConstructorDecl(isDefaultConstructor())));
60+
const auto VectorVarDecl =
61+
varDecl(hasInitializer(VectorDefaultConstructorCall))
62+
.bind(VectorVarDeclName);
63+
const auto PushBackCallExpr =
64+
cxxMemberCallExpr(
65+
callee(cxxMethodDecl(hasName("push_back"))), on(hasType(VectorDecl)),
66+
onImplicitObjectArgument(declRefExpr(to(VectorVarDecl))))
67+
.bind(PushBackCallName);
68+
const auto PushBackCall =
69+
expr(anyOf(PushBackCallExpr, exprWithCleanups(has(PushBackCallExpr))));
70+
const auto VectorVarDefStmt =
71+
declStmt(hasSingleDecl(equalsBoundNode(VectorVarDeclName)))
72+
.bind(VectorVarDeclStmtName);
73+
74+
const auto LoopVarInit =
75+
declStmt(hasSingleDecl(varDecl(hasInitializer(integerLiteral(equals(0))))
76+
.bind(LoopInitVarName)));
77+
const auto RefersToLoopVar = ignoringParenImpCasts(
78+
declRefExpr(to(varDecl(equalsBoundNode(LoopInitVarName)))));
79+
80+
// Match counter-based for loops:
81+
// for (int i = 0; i < n; ++i) { v.push_back(...); }
82+
//
83+
// FIXME: Support more types of counter-based loops like decrement loops.
84+
Finder->addMatcher(
85+
forStmt(
86+
hasLoopInit(LoopVarInit),
87+
hasCondition(binaryOperator(
88+
hasOperatorName("<"), hasLHS(RefersToLoopVar),
89+
hasRHS(expr(unless(hasDescendant(expr(RefersToLoopVar))))
90+
.bind(LoopEndExprName)))),
91+
hasIncrement(unaryOperator(hasOperatorName("++"),
92+
hasUnaryOperand(RefersToLoopVar))),
93+
hasBody(anyOf(compoundStmt(statementCountIs(1), has(PushBackCall)),
94+
PushBackCall)),
95+
hasParent(compoundStmt(has(VectorVarDefStmt)).bind(LoopParentName)))
96+
.bind(LoopCounterName),
97+
this);
98+
}
99+
100+
void InefficientVectorOperationCheck::check(
101+
const MatchFinder::MatchResult &Result) {
102+
if (Result.Context->getDiagnostics().hasUncompilableErrorOccurred())
103+
return;
104+
105+
const SourceManager &SM = *Result.SourceManager;
106+
const auto *ForLoop = Result.Nodes.getNodeAs<ForStmt>(LoopCounterName);
107+
const auto *PushBackCall =
108+
Result.Nodes.getNodeAs<CXXMemberCallExpr>(PushBackCallName);
109+
const auto *LoopEndExpr = Result.Nodes.getNodeAs<Expr>(LoopEndExprName);
110+
const auto *LoopParent = Result.Nodes.getNodeAs<CompoundStmt>(LoopParentName);
111+
const auto *VectorVarDecl =
112+
Result.Nodes.getNodeAs<VarDecl>(VectorVarDeclName);
113+
114+
llvm::SmallPtrSet<const DeclRefExpr *, 16> AllVectorVarRefs =
115+
utils::decl_ref_expr::allDeclRefExprs(*VectorVarDecl, *LoopParent,
116+
*Result.Context);
117+
for (const auto *Ref : AllVectorVarRefs) {
118+
// Skip cases where there are usages (defined as DeclRefExpr that refers to
119+
// "v") of vector variable `v` before the for loop. We consider these usages
120+
// are operations causing memory preallocation (e.g. "v.resize(n)",
121+
// "v.reserve(n)").
122+
//
123+
// FIXME: make it more intelligent to identify the pre-allocating operations
124+
// before the for loop.
125+
if (SM.isBeforeInTranslationUnit(Ref->getLocation(),
126+
ForLoop->getLocStart())) {
127+
return;
128+
}
129+
}
130+
131+
llvm::StringRef LoopEndSource = clang::Lexer::getSourceText(
132+
CharSourceRange::getTokenRange(LoopEndExpr->getSourceRange()), SM,
133+
clang::LangOptions());
134+
llvm::StringRef VectorVarName = clang::Lexer::getSourceText(
135+
CharSourceRange::getTokenRange(
136+
PushBackCall->getImplicitObjectArgument()->getSourceRange()),
137+
SM, clang::LangOptions());
138+
std::string ReserveStmt =
139+
(VectorVarName + ".reserve(" + LoopEndSource + ");\n").str();
140+
141+
diag(PushBackCall->getLocStart(),
142+
"'push_back' is called inside a loop; "
143+
"consider pre-allocating the vector capacity before the loop.")
144+
<< FixItHint::CreateInsertion(ForLoop->getLocStart(), ReserveStmt);
145+
}
146+
147+
} // namespace performance
148+
} // namespace tidy
149+
} // namespace clang
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//===--- InefficientVectorOperationCheck.h - clang-tidy----------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENT_VECTOR_OPERATION_H
11+
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENT_VECTOR_OPERATION_H
12+
13+
#include "../ClangTidy.h"
14+
15+
namespace clang {
16+
namespace tidy {
17+
namespace performance {
18+
19+
/// Finds possible inefficient `std::vector` operations (e.g. `push_back`) in
20+
/// for loops that may cause unnecessary memory reallocations.
21+
///
22+
/// For the user-facing documentation see:
23+
/// http://clang.llvm.org/extra/clang-tidy/checks/performance-inefficient-vector-operation.html
24+
class InefficientVectorOperationCheck : public ClangTidyCheck {
25+
public:
26+
InefficientVectorOperationCheck(StringRef Name, ClangTidyContext *Context)
27+
: ClangTidyCheck(Name, Context) {}
28+
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
29+
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
30+
};
31+
32+
} // namespace performance
33+
} // namespace tidy
34+
} // namespace clang
35+
36+
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENT_VECTOR_OPERATION_H

Diff for: ‎clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "ForRangeCopyCheck.h"
1515
#include "ImplicitCastInLoopCheck.h"
1616
#include "InefficientStringConcatenationCheck.h"
17+
#include "InefficientVectorOperationCheck.h"
1718
#include "TypePromotionInMathFnCheck.h"
1819
#include "UnnecessaryCopyInitialization.h"
1920
#include "UnnecessaryValueParamCheck.h"
@@ -33,6 +34,8 @@ class PerformanceModule : public ClangTidyModule {
3334
"performance-implicit-cast-in-loop");
3435
CheckFactories.registerCheck<InefficientStringConcatenationCheck>(
3536
"performance-inefficient-string-concatenation");
37+
CheckFactories.registerCheck<InefficientVectorOperationCheck>(
38+
"performance-inefficient-vector-operation");
3639
CheckFactories.registerCheck<TypePromotionInMathFnCheck>(
3740
"performance-type-promotion-in-math-fn");
3841
CheckFactories.registerCheck<UnnecessaryCopyInitialization>(

Diff for: ‎clang-tools-extra/docs/ReleaseNotes.rst

+7-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ Improvements to clang-tidy
6262

6363
Finds modification of the ``std`` or ``posix`` namespace.
6464

65-
- Improved `cppcoreguidelines-no-malloc
65+
- Improved `cppcoreguidelines-no-malloc
6666
<http://clang.llvm.org/extra/clang-tidy/checks/cppcoreguidelines-no-malloc.html>`_ check
6767

6868
Allow custom memory management functions to be considered as well.
@@ -95,6 +95,12 @@ Improvements to clang-tidy
9595
- Support clang-formatting of the code around applied fixes (``-format-style``
9696
command-line option).
9797

98+
- New `performance-inefficient-vector-operation
99+
<http://clang.llvm.org/extra/clang-tidy/checks/performance-inefficient-vector-operation.html>`_ check
100+
101+
Finds possible inefficient vector operations in for loops that may cause
102+
unnecessary memory reallocations.
103+
98104
Improvements to include-fixer
99105
-----------------------------
100106

Diff for: ‎clang-tools-extra/docs/clang-tidy/checks/list.rst

+1
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ Clang-Tidy Checks
142142
performance-for-range-copy
143143
performance-implicit-cast-in-loop
144144
performance-inefficient-string-concatenation
145+
performance-inefficient-vector-operation
145146
performance-type-promotion-in-math-fn
146147
performance-unnecessary-copy-initialization
147148
performance-unnecessary-value-param
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
.. title:: clang-tidy - performance-inefficient-vector-operation
2+
3+
performance-inefficient-vector-operation
4+
========================================
5+
6+
Finds possible inefficient `std::vector` operations (e.g. `push_back`) that may
7+
cause unnecessary memory reallocations.
8+
9+
Currently, the check only detects a typical counter-based for loop with a single
10+
statement in it, see below:
11+
12+
.. code-block:: c++
13+
14+
std::vector<int> v;
15+
for (int i = 0; i < n; ++i) {
16+
v.push_back(n);
17+
// This will trigger the warning since the push_back may cause multiple
18+
// memory reallocations in v. This can be avoid by inserting a 'reserve(n)'
19+
// statment before the for statment.
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
// RUN: %check_clang_tidy %s performance-inefficient-vector-operation %t -- -format-style=llvm --
2+
3+
typedef int size_t;
4+
5+
namespace std {
6+
template <class T>
7+
class vector {
8+
public:
9+
typedef T* iterator;
10+
typedef const T* const_iterator;
11+
typedef T& reference;
12+
typedef const T& const_reference;
13+
typedef size_t size_type;
14+
15+
explicit vector();
16+
explicit vector(size_type n);
17+
18+
void push_back(const T& val);
19+
void reserve(size_t n);
20+
void resize(size_t n);
21+
22+
size_t size();
23+
const_reference operator[] (size_type) const;
24+
reference operator[] (size_type);
25+
};
26+
} // namespace std
27+
28+
void f(std::vector<int>& t) {
29+
{
30+
std::vector<int> v;
31+
// CHECK-FIXES: v.reserve(10);
32+
for (int i = 0; i < 10; ++i)
33+
v.push_back(i);
34+
// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: 'push_back' is called inside a loop; consider pre-allocating the vector capacity before the loop
35+
}
36+
{
37+
std::vector<int> v;
38+
// CHECK-FIXES: v.reserve(10);
39+
for (int i = 0; i < 10; i++)
40+
v.push_back(i);
41+
// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: 'push_back' is called
42+
}
43+
{
44+
std::vector<int> v;
45+
// CHECK-FIXES: v.reserve(10);
46+
for (int i = 0; i < 10; ++i)
47+
v.push_back(0);
48+
// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: 'push_back' is called
49+
}
50+
{
51+
std::vector<int> v;
52+
// CHECK-FIXES: v.reserve(5);
53+
for (int i = 0; i < 5; ++i) {
54+
v.push_back(i);
55+
// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: 'push_back' is called
56+
}
57+
// CHECK-FIXES-NOT: v.reserve(10);
58+
for (int i = 0; i < 10; ++i) {
59+
// No fix for this loop as we encounter the prior loops.
60+
v.push_back(i);
61+
}
62+
}
63+
{
64+
std::vector<int> v;
65+
std::vector<int> v2;
66+
v2.reserve(3);
67+
// CHECK-FIXES: v.reserve(10);
68+
for (int i = 0; i < 10; ++i)
69+
v.push_back(i);
70+
// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: 'push_back' is called
71+
}
72+
{
73+
std::vector<int> v;
74+
// CHECK-FIXES: v.reserve(t.size());
75+
for (size_t i = 0; i < t.size(); ++i) {
76+
v.push_back(t[i]);
77+
// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: 'push_back' is called
78+
}
79+
}
80+
{
81+
std::vector<int> v;
82+
// CHECK-FIXES: v.reserve(t.size() - 1);
83+
for (size_t i = 0; i < t.size() - 1; ++i) {
84+
v.push_back(t[i]);
85+
} // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: 'push_back' is called
86+
}
87+
88+
// ---- Non-fixed Cases ----
89+
{
90+
std::vector<int> v;
91+
v.reserve(20);
92+
// CHECK-FIXES-NOT: v.reserve(10);
93+
// There is a "reserve" call already.
94+
for (int i = 0; i < 10; ++i) {
95+
v.push_back(i);
96+
}
97+
}
98+
{
99+
std::vector<int> v;
100+
v.reserve(5);
101+
// CHECK-FIXES-NOT: v.reserve(10);
102+
// There is a "reserve" call already.
103+
for (int i = 0; i < 10; ++i) {
104+
v.push_back(i);
105+
}
106+
}
107+
{
108+
std::vector<int> v;
109+
v.resize(5);
110+
// CHECK-FIXES-NOT: v.reserve(10);
111+
// There is a ref usage of v before the loop.
112+
for (int i = 0; i < 10; ++i) {
113+
v.push_back(i);
114+
}
115+
}
116+
{
117+
std::vector<int> v;
118+
v.push_back(0);
119+
// CHECK-FIXES-NOT: v.reserve(10);
120+
// There is a ref usage of v before the loop.
121+
for (int i = 0; i < 10; ++i) {
122+
v.push_back(i);
123+
}
124+
}
125+
{
126+
std::vector<int> v;
127+
f(v);
128+
// CHECK-FIXES-NOT: v.reserve(10);
129+
// There is a ref usage of v before the loop.
130+
for (int i = 0; i < 10; ++i) {
131+
v.push_back(i);
132+
}
133+
}
134+
{
135+
std::vector<int> v(20);
136+
// CHECK-FIXES-NOT: v.reserve(10);
137+
// v is not constructed with default constructor.
138+
for (int i = 0; i < 10; ++i) {
139+
v.push_back(i);
140+
}
141+
}
142+
{
143+
std::vector<int> v;
144+
// CHECK-FIXES-NOT: v.reserve(10);
145+
// For-loop is not started with 0.
146+
for (int i = 1; i < 10; ++i) {
147+
v.push_back(i);
148+
}
149+
}
150+
{
151+
std::vector<int> v;
152+
// CHECK-FIXES-NOT: v.reserve(t.size());
153+
// v isn't referenced in for-loop body.
154+
for (size_t i = 0; i < t.size(); ++i) {
155+
t.push_back(i);
156+
}
157+
}
158+
{
159+
std::vector<int> v;
160+
int k;
161+
// CHECK-FIXES-NOT: v.reserve(10);
162+
// For-loop isn't a fixable loop.
163+
for (size_t i = 0; k < 10; ++i) {
164+
v.push_back(t[i]);
165+
}
166+
}
167+
{
168+
std::vector<int> v;
169+
// CHECK-FIXES-NOT: v.reserve(i + 1);
170+
// The loop end expression refers to the loop variable i.
171+
for (int i = 0; i < i + 1; i++)
172+
v.push_back(i);
173+
}
174+
{
175+
std::vector<int> v;
176+
int k;
177+
// CHECK-FIXES-NOT: v.reserve(10);
178+
// For-loop isn't a fixable loop.
179+
for (size_t i = 0; i < 10; ++k) {
180+
v.push_back(t[i]);
181+
}
182+
}
183+
}

0 commit comments

Comments
 (0)
Please sign in to comment.