Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(remove-cstr-calls) add_subdirectory(tool-template) +add_subdirectory(loop-convert) # Add the common testsuite after all the tools. add_subdirectory(test) Index: loop-convert/CMakeLists.txt =================================================================== --- /dev/null +++ loop-convert/CMakeLists.txt @@ -0,0 +1,20 @@ +set(LLVM_LINK_COMPONENTS support) +set(LLVM_USED_LIBS clangTooling clangBasic clangAST) + +add_clang_executable(loop-convert + LoopConvert.cpp + LoopActions.cpp + LoopActions.h + LoopMatchers.cpp + LoopMatchers.h + StmtAncestor.cpp + StmtAncestor.h + VariableNaming.cpp + VariableNaming.h + ) + +target_link_libraries(loop-convert + clangTooling + clangBasic + clangASTMatchers + ) Index: loop-convert/LoopActions.h =================================================================== --- /dev/null +++ loop-convert/LoopActions.h @@ -0,0 +1,108 @@ +//===-- loop-convert/LoopActions.h - C++11 For loop migration ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares matchers and callbacks for use in migrating C++ for loops. +// +//===----------------------------------------------------------------------===// +#ifndef _LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_CONVERT_LOOPACTIONS_H_ +#define _LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_CONVERT_LOOPACTIONS_H_ + +#include "StmtAncestor.h" +#include "clang/Tooling/Refactoring.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +namespace clang { +namespace loop_migrate { + +struct Usage; +class Confidence; +// The main computational result of ForLoopIndexUseVisitor. +typedef llvm::SmallVector UsageResult; + +/// \brief The level of safety to require of transformations. +enum TranslationConfidenceKind { + TCK_Risky, + TCK_Reasonable, + TCK_Safe +}; + +enum LoopFixerKind { + LFK_Array, + LFK_Iterator, + LFK_PseudoArray +}; + +/// \brief The callback to be used for loop migration matchers. +/// +/// The callback does extra checking not possible in matchers, and attempts to +/// convert the for loop, if possible. +class LoopFixer : public ast_matchers::MatchFinder::MatchCallback { + public: + LoopFixer(StmtAncestorASTVisitor *ParentFinder, + tooling::Replacements *Replace, + StmtGeneratedVarNameMap *GeneratedDecls, + ReplacedVarsMap *ReplacedVarRanges, + unsigned *AcceptedChanges, unsigned *DeferredChanges, + unsigned *RejectedChanges, bool CountOnly, + TranslationConfidenceKind RequiredConfidenceLevel, + LoopFixerKind FixerKind) : + ParentFinder(ParentFinder), Replace(Replace), + GeneratedDecls(GeneratedDecls), ReplacedVarRanges(ReplacedVarRanges), + AcceptedChanges(AcceptedChanges), DeferredChanges(DeferredChanges), + RejectedChanges(RejectedChanges), CountOnly(CountOnly), + RequiredConfidenceLevel(RequiredConfidenceLevel), FixerKind(FixerKind) { } + virtual void run(const ast_matchers::MatchFinder::MatchResult &Result); + + private: + StmtAncestorASTVisitor *ParentFinder; + tooling::Replacements *Replace; + StmtGeneratedVarNameMap *GeneratedDecls; + ReplacedVarsMap *ReplacedVarRanges; + unsigned *AcceptedChanges; + unsigned *DeferredChanges; + unsigned *RejectedChanges; + bool CountOnly; + TranslationConfidenceKind RequiredConfidenceLevel; + LoopFixerKind FixerKind; + + /// \brief Computes the changes needed to convert a given for loop, and + /// applies it if this->CountOnly is false. + void doConversion(ASTContext *Context, + const VarDecl *IndexVar, + const VarDecl *MaybeContainer, + StringRef ContainerString, + const UsageResult &Usages, + const DeclStmt *AliasDecl, const ForStmt *TheLoop, + bool ContainerNeedsDereference); + + /// \brief Given a loop header that would be convertible, discover all usages + /// of the index variable and convert the loop if possible. + void findAndVerifyUsages(ASTContext *Context, + const VarDecl *LoopVar, + const VarDecl *EndVar, + const Expr *ContainerExpr, + const Expr *BoundExpr, + bool ContainerNeedsDereference, + const ForStmt *TheLoop, + Confidence ConfidenceLevel); + + /// \brief Determine if the change should be deferred or rejected, returning + /// text which refers to the container iterated over if the change should + /// proceed. + StringRef checkDeferralsAndRejections(ASTContext *Context, + const Expr *ContainerExpr, + Confidence ConfidenceLevel, + const ForStmt *TheLoop); +}; + +} // namespace loop_migrate +} // namespace clang +#endif // _LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_CONVERT_LOOPACTIONS_H_ Index: loop-convert/LoopActions.cpp =================================================================== --- /dev/null +++ loop-convert/LoopActions.cpp @@ -0,0 +1,991 @@ +//===-- loop-convert/LoopActions.cpp - C++11 For loop migration -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines matchers and callbacks for use in migrating C++ for loops. +// +//===----------------------------------------------------------------------===// +#include "LoopActions.h" +#include "LoopMatchers.h" +#include "VariableNaming.h" + +#include "clang/Lex/Lexer.h" + +namespace clang { +namespace loop_migrate { + +using namespace clang::ast_matchers; +using namespace clang::tooling; + +/// \brief The information needed to describe a valid convertible usage +/// of an array index or iterator. +struct Usage { + const Expr *E; + bool IsArrow; + SourceRange Range; + + explicit Usage(const Expr *E) + : E(E), IsArrow(false), Range(E->getSourceRange()) { } + Usage(const Expr *E, bool IsArrow, SourceRange Range) + : E(E), IsArrow(IsArrow), Range(Range) { } +}; + +/// \brief A class to encapsulate lowering of the tool's confidence level. +class Confidence { + public: + /// \brief Initialize the default confidence level to the maximum value + /// (TCK_Safe). + explicit Confidence(TranslationConfidenceKind Level) : + CurrentLevel(Level) {} + + /// \brief Lower the internal confidence level to Level, but do not raise it. + void lowerTo(TranslationConfidenceKind Level) { + CurrentLevel = std::min(Level, CurrentLevel); + } + + /// \brief Return the internal confidence level. + TranslationConfidenceKind get() const { return CurrentLevel; } + + /// \brief Set the confidence level unconditionally. + void resetTo(TranslationConfidenceKind Level) { CurrentLevel = Level; } + + private: + TranslationConfidenceKind CurrentLevel; +}; + +/// \brief Discover usages of expressions consisting of index or iterator +/// access. +/// +/// Given an index variable, recursively crawls a for loop to discover if the +/// index variable is used in a way consistent with range-based for loop access. +class ForLoopIndexUseVisitor + : public RecursiveASTVisitor { + public: + ForLoopIndexUseVisitor(ASTContext *Context, const VarDecl *IndexVar, + const VarDecl *EndVar, const Expr *ContainerExpr, + const Expr *ArrayBoundExpr, + bool ContainerNeedsDereference) : + Context(Context), IndexVar(IndexVar), EndVar(EndVar), + ContainerExpr(ContainerExpr), ArrayBoundExpr(ArrayBoundExpr), + ContainerNeedsDereference(ContainerNeedsDereference), + OnlyUsedAsIndex(true), AliasDecl(NULL), ConfidenceLevel(TCK_Safe) { + if (ContainerExpr) { + addComponent(ContainerExpr); + llvm::FoldingSetNodeID ID; + const Expr *E = ContainerExpr->IgnoreParenImpCasts(); + E->Profile(ID, *Context, true); + } + } + + /// \brief Finds all uses of IndexVar in Body, placing all usages in Usages, + /// and returns true if IndexVar was only used in a way consistent with a + /// range-based for loop. + /// + /// The general strategy is to reject any DeclRefExprs referencing IndexVar, + /// with the exception of certain acceptable patterns. + /// For arrays, the DeclRefExpr for IndexVar must appear as the index of an + /// ArraySubscriptExpression. Iterator-based loops may dereference + /// IndexVar or call methods through operator-> (builtin or overloaded). + /// Array-like containers may use IndexVar as a parameter to the at() member + /// function and in overloaded operator[]. + bool findAndVerifyUsages(const Stmt *Body) { + TraverseStmt(const_cast(Body)); + return OnlyUsedAsIndex && ContainerExpr; + } + + /// \brief Add a set of components that we should consider relevant to the + /// container. + void addComponents(const ComponentVector &Components) { + // FIXME: add sort(on ID)+unique to avoid extra work. + for (ComponentVector::const_iterator I = Components.begin(), + E = Components.end(); I != E; ++I) + addComponent(*I); + } + + /// \brief Accessor for Usages. + const UsageResult &getUsages() const { return Usages; } + + /// \brief Get the container indexed by IndexVar, if any. + const Expr *getContainerIndexed() const { + return ContainerExpr; + } + + /// \brief Returns the statement declaring the variable created as an alias + /// for the loop element, if any. + const DeclStmt *getAliasDecl() const { return AliasDecl; } + + /// \brief Accessor for ConfidenceLevel. + TranslationConfidenceKind getConfidenceLevel() const { + return ConfidenceLevel.get(); + } + + private: + /// Typedef used in CRTP functions. + typedef RecursiveASTVisitor VisitorBase; + friend class RecursiveASTVisitor; + + /// Overriden methods for RecursiveASTVisitor's traversal. + bool TraverseArraySubscriptExpr(ArraySubscriptExpr *ASE); + bool TraverseCXXMemberCallExpr(CXXMemberCallExpr *MemberCall); + bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *OpCall); + bool TraverseMemberExpr(MemberExpr *Member); + bool TraverseUnaryDeref(UnaryOperator *Uop); + bool VisitDeclRefExpr(DeclRefExpr *DRE); + bool VisitDeclStmt(DeclStmt *DS); + + /// \brief Add an expression to the list of expressions on which the container + /// expression depends. + void addComponent(const Expr *E) { + llvm::FoldingSetNodeID ID; + const Expr *Node = E->IgnoreParenImpCasts(); + Node->Profile(ID, *Context, true); + DependentExprs.push_back(std::make_pair(Node, ID)); + } + + // Input member variables: + ASTContext *Context; + /// The index variable's VarDecl. + const VarDecl *IndexVar; + /// The loop's 'end' variable, which cannot be mentioned at all. + const VarDecl *EndVar; + /// The Expr which refers to the container. + const Expr *ContainerExpr; + /// The Expr which refers to the terminating condition for array-based loops. + const Expr *ArrayBoundExpr; + bool ContainerNeedsDereference; + + // Output member variables: + /// A container which holds all usages of IndexVar as the index of + /// ArraySubscriptExpressions. + UsageResult Usages; + bool OnlyUsedAsIndex; + /// The DeclStmt for an alias to the container element. + const DeclStmt *AliasDecl; + Confidence ConfidenceLevel; + /// \brief A list of expressions on which ContainerExpr depends. + /// + /// If any of these expressions are encountered outside of an acceptable usage + /// of the loop element, lower our confidence level. + llvm::SmallVector< + std::pair, 16> DependentExprs; +}; + +/// \brief Obtain the original source code text from a SourceRange. +static StringRef getStringFromRange(SourceManager &SourceMgr, + const LangOptions &LangOpts, + SourceRange Range) { + if (SourceMgr.getFileID(Range.getBegin()) != + SourceMgr.getFileID(Range.getEnd())) + return NULL; + + CharSourceRange SourceChars(Range, true); + return Lexer::getSourceText(SourceChars, SourceMgr, LangOpts); +} + +/// \brief Returns the DeclRefExpr represented by E, or NULL if there isn't one. +static const DeclRefExpr *getDeclRef(const Expr *E) { + return dyn_cast(E->IgnoreParenImpCasts()); +} + +/// \brief If the given expression is actually a DeclRefExpr, find and return +/// the underlying VarDecl; otherwise, return NULL. +static const VarDecl *getReferencedVariable(const Expr *E) { + if (const DeclRefExpr *DRE = getDeclRef(E)) + return dyn_cast(DRE->getDecl()); + return NULL; +} + +/// \brief Returns true when the given expression is a member expression +/// whose base is `this` (implicitly or not). +static bool isDirectMemberExpr(const Expr *E) { + if (const MemberExpr *Member = dyn_cast(E->IgnoreParenImpCasts())) + return isa(Member->getBase()->IgnoreParenImpCasts()); + return false; +} + +/// \brief Returns true when two ValueDecls are the same variable. +static bool areSameVariable(const ValueDecl *First, const ValueDecl *Second) { + return First && Second && + First->getCanonicalDecl() == Second->getCanonicalDecl(); +} + +/// \brief Determines if an expression is a declaration reference to a +/// particular variable. +static bool exprReferencesVariable(const ValueDecl *Target, const Expr *E) { + if (!Target || !E) + return false; + const DeclRefExpr *DRE = getDeclRef(E); + return DRE && areSameVariable(Target, DRE->getDecl()); +} + +/// \brief Returns true when two Exprs are equivalent. +static bool areSameExpr(ASTContext* Context, const Expr *First, + const Expr *Second) { + if (!First || !Second) + return false; + + llvm::FoldingSetNodeID FirstID, SecondID; + First->Profile(FirstID, *Context, true); + Second->Profile(SecondID, *Context, true); + return FirstID == SecondID; +} + +/// \brief Look through conversion/copy constructors to find the explicit +/// initialization expression, returning it is found. +/// +/// The main idea is that given +/// vector v; +/// we consider either of these initializations +/// vector::iterator it = v.begin(); +/// vector::iterator it(v.begin()); +/// and retrieve `v.begin()` as the expression used to initialize `it` but do +/// not include +/// vector::iterator it; +/// vector::iterator it(v.begin(), 0); // if this constructor existed +/// as being initialized from `v.begin()` +static const Expr *digThroughConstructors(const Expr *E) { + if (!E) + return NULL; + E = E->IgnoreParenImpCasts(); + if (const CXXConstructExpr *ConstructExpr = dyn_cast(E)) { + // The initial constructor must take exactly one parameter, but base class + // and deferred constructors can take more. + if (ConstructExpr->getNumArgs() != 1 || + ConstructExpr->getConstructionKind() != CXXConstructExpr::CK_Complete) + return NULL; + E = ConstructExpr->getArg(0); + if (const MaterializeTemporaryExpr *MTE = + dyn_cast(E)) + E = MTE->GetTemporaryExpr(); + return digThroughConstructors(E); + } + return E; +} + +/// \brief If the expression is a dereference or call to operator*(), return the +/// operand. Otherwise, return NULL. +static const Expr *getDereferenceOperand(const Expr *E) { + if (const UnaryOperator *Uop = dyn_cast(E)) + return Uop->getOpcode() == UO_Deref ? Uop->getSubExpr() : NULL; + + if (const CXXOperatorCallExpr *OpCall = dyn_cast(E)) + return OpCall->getOperator() == OO_Star && OpCall->getNumArgs() == 1 ? + OpCall->getArg(0) : NULL; + + return NULL; +} + +/// \brief Returns true when the Container contains an Expr equivalent to E. +template +static bool containsExpr(ASTContext *Context, const ContainerT *Container, + const Expr *E) { + llvm::FoldingSetNodeID ID; + E->Profile(ID, *Context, true); + for (typename ContainerT::const_iterator I = Container->begin(), + End = Container->end(); I != End; ++I) + if (ID == I->second) + return true; + return false; +} + +/// \brief Returns true when the index expression is a declaration reference to +/// IndexVar. +/// +/// If the index variable is `index`, this function returns true on +/// arrayExpression[index]; +/// containerExpression[index]; +/// but not +/// containerExpression[notIndex]; +static bool isIndexInSubscriptExpr(const Expr *IndexExpr, + const VarDecl *IndexVar) { + const DeclRefExpr *Idx = getDeclRef(IndexExpr); + return Idx && Idx->getType()->isIntegerType() + && areSameVariable(IndexVar, Idx->getDecl()); +} + +/// \brief Returns true when the index expression is a declaration reference to +/// IndexVar, Obj is the same expression as SourceExpr after all parens and +/// implicit casts are stripped off. +/// +/// If PermitDeref is true, IndexExpression may +/// be a dereference (overloaded or builtin operator*). +/// +/// This function is intended for array-like containers, as it makes sure that +/// both the container and the index match. +/// If the loop has index variable `index` and iterates over `container`, then +/// isIndexInSubscriptExpr returns true for +/// \code +/// container[index] +/// container.at(index) +/// container->at(index) +/// \endcode +/// but not for +/// \code +/// container[notIndex] +/// notContainer[index] +/// \endcode +/// If PermitDeref is true, then isIndexInSubscriptExpr additionally returns +/// true on these expressions: +/// \code +/// (*container)[index] +/// (*container).at(index) +/// \endcode +static bool isIndexInSubscriptExpr(ASTContext *Context, const Expr *IndexExpr, + const VarDecl *IndexVar, const Expr *Obj, + const Expr *SourceExpr, bool PermitDeref) { + if (!SourceExpr || !Obj || !isIndexInSubscriptExpr(IndexExpr, IndexVar)) + return false; + + if (areSameExpr(Context, SourceExpr->IgnoreParenImpCasts(), + Obj->IgnoreParenImpCasts())) + return true; + + if (const Expr *InnerObj = getDereferenceOperand(Obj->IgnoreParenImpCasts())) + if (PermitDeref && areSameExpr(Context, SourceExpr->IgnoreParenImpCasts(), + InnerObj->IgnoreParenImpCasts())) + return true; + + return false; +} + +/// \brief Returns true when Opcall is a call a one-parameter dereference of +/// IndexVar. +/// +/// For example, if the index variable is `index`, returns true for +/// *index +/// but not +/// index +/// *notIndex +static bool isDereferenceOfOpCall(const CXXOperatorCallExpr *OpCall, + const VarDecl *IndexVar) { + return OpCall->getOperator() == OO_Star && OpCall->getNumArgs() == 1 && + exprReferencesVariable(IndexVar, OpCall->getArg(0)); +} + +/// \brief Returns true when Uop is a dereference of IndexVar. +/// +/// For example, if the index variable is `index`, returns true for +/// *index +/// but not +/// index +/// *notIndex +static bool isDereferenceOfUop(const UnaryOperator *Uop, + const VarDecl *IndexVar) { + return Uop->getOpcode() == UO_Deref && + exprReferencesVariable(IndexVar, Uop->getSubExpr()); +} + +/// \brief Determines whether the given Decl defines a variable initialized to +/// the loop object. +/// +/// This is intended to find cases such as +/// \code +/// for (int i = 0; i < arraySize(arr); ++i) { +/// T t = arr[i]; +/// // use t, do not use i +/// } +/// \endcode +/// and +/// \code +/// for (iterator i = container.begin(), e = container.end(); i != e; ++i) { +/// T t = *i; +/// // use t, do not use i +/// } +/// \code +static bool isAliasDecl(const Decl *TheDecl, const VarDecl *IndexVar) { + const VarDecl *VDecl = dyn_cast(TheDecl); + if (!VDecl) + return false; + if (!VDecl->hasInit()) + return false; + const Expr *Init = + digThroughConstructors(VDecl->getInit()->IgnoreParenImpCasts()); + if (!Init) + return false; + + switch (Init->getStmtClass()) { + case Stmt::ArraySubscriptExprClass: { + const ArraySubscriptExpr *ASE = cast(Init); + // We don't really care which array is used here. We check to make sure + // it was the correct one later, since the AST will traverse it next. + return isIndexInSubscriptExpr(ASE->getIdx(), IndexVar); + } + + case Stmt::UnaryOperatorClass: + return isDereferenceOfUop(cast(Init), IndexVar); + + case Stmt::CXXOperatorCallExprClass: { + const CXXOperatorCallExpr *OpCall = cast(Init); + if (OpCall->getOperator() == OO_Star) + return isDereferenceOfOpCall(OpCall, IndexVar); + break; + } + + default: + break; + } + return false; +} + +/// \brief Determines whether the bound of a for loop condition expression is +/// the same as the statically computable size of ArrayType. +/// +/// Given +/// \code +/// const int N = 5; +/// int arr[N]; +/// \endcode +/// This is intended to permit +/// \code +/// for (int i = 0; i < N; ++i) { /* use arr[i] */ } +/// for (int i = 0; i < arraysize(arr); ++i) { /* use arr[i] */ } +/// \endcode +static bool arrayMatchesBoundExpr(ASTContext *Context, + const QualType &ArrayType, + const Expr *ConditionExpr) { + if (!ConditionExpr || ConditionExpr->isValueDependent()) + return false; + const ConstantArrayType *CAT = Context->getAsConstantArrayType(ArrayType); + if (!CAT) + return false; + llvm::APSInt ConditionSize; + if (!ConditionExpr->isIntegerConstantExpr(ConditionSize, *Context)) + return false; + llvm::APSInt ArraySize(CAT->getSize()); + return llvm::APSInt::isSameValue(ConditionSize, ArraySize); +} + +/// \brief If the unary operator is a dereference of IndexVar, include it +/// as a valid usage and prune the traversal. +/// +/// For example, if container.begin() and container.end() both return pointers +/// to int, this makes sure that the initialization for `k` is not counted as an +/// unconvertible use of the iterator `i`. +/// \code +/// for (int *i = container.begin(), *e = container.end(); i != e; ++i) { +/// int k = *i + 2; +/// } +/// \endcode +bool ForLoopIndexUseVisitor::TraverseUnaryDeref(UnaryOperator *Uop) { + // If we dereference an iterator that's actually a pointer, count the + // occurrence. + if (isDereferenceOfUop(Uop, IndexVar)) { + Usages.push_back(Usage(Uop)); + return true; + } + + return VisitorBase::TraverseUnaryOperator(Uop); +} + +/// \brief If the member expression is operator-> (overloaded or not) on +/// IndexVar, include it as a valid usage and prune the traversal. +/// +/// For example, given +/// \code +/// struct Foo { int bar(); int x; }; +/// vector v; +/// \endcode +/// the following uses will be considered convertible: +/// \code +/// for (vector::iterator i = v.begin(), e = v.end(); i != e; ++i) { +/// int b = i->bar(); +/// int k = i->x + 1; +/// } +/// \endcode +/// though +/// \code +/// for (vector::iterator i = v.begin(), e = v.end(); i != e; ++i) { +/// int k = i.insert(1); +/// } +/// for (vector::iterator i = v.begin(), e = v.end(); i != e; ++i) { +/// int b = e->bar(); +/// } +/// \endcode +/// will not. +bool ForLoopIndexUseVisitor::TraverseMemberExpr(MemberExpr *Member) { + const Expr *Base = Member->getBase(); + const DeclRefExpr *Obj = getDeclRef(Base); + const Expr *ResultExpr = Member; + QualType ExprType; + if (const CXXOperatorCallExpr *Call = + dyn_cast(Base->IgnoreParenImpCasts())) { + // If operator->() is a MemberExpr containing a CXXOperatorCallExpr, then + // the MemberExpr does not have the expression we want. We therefore catch + // that instance here. + // For example, if vector::iterator defines operator->(), then the + // example `i->bar()` at the top of this function is a CXXMemberCallExpr + // referring to `i->` as the member function called. We want just `i`, so + // we take the argument to operator->() as the base object. + if(Call->getOperator() == OO_Arrow) { + assert(Call->getNumArgs() == 1 && + "Operator-> takes more than one argument"); + Obj = getDeclRef(Call->getArg(0)); + ResultExpr = Obj; + ExprType = Call->getCallReturnType(); + } + } + + if (Member->isArrow() && Obj && exprReferencesVariable(IndexVar, Obj)) { + if (ExprType.isNull()) + ExprType = Obj->getType(); + + assert(ExprType->isPointerType() && "Operator-> returned non-pointer type"); + // FIXME: This works around not having the location of the arrow operator. + // Consider adding OperatorLoc to MemberExpr? + SourceLocation ArrowLoc = + Lexer::getLocForEndOfToken(Base->getExprLoc(), 0, + Context->getSourceManager(), + Context->getLangOpts()); + // If something complicated is happening (i.e. the next token isn't an + // arrow), give up on making this work. + if (!ArrowLoc.isInvalid()) { + Usages.push_back(Usage(ResultExpr, /*IsArrow=*/true, + SourceRange(Base->getExprLoc(), ArrowLoc))); + return true; + } + } + return TraverseStmt(Member->getBase()); +} + +/// \brief If a member function call is the at() accessor on the container with +/// IndexVar as the single argument, include it as a valid usage and prune +/// the traversal. +/// +/// Member calls on other objects will not be permitted. +/// Calls on the iterator object are not permitted, unless done through +/// operator->(). The one exception is allowing vector::at() for pseudoarrays. +bool ForLoopIndexUseVisitor::TraverseCXXMemberCallExpr( + CXXMemberCallExpr *MemberCall) { + MemberExpr *Member = + dyn_cast(MemberCall->getCallee()->IgnoreParenImpCasts()); + if (!Member) + return VisitorBase::TraverseCXXMemberCallExpr(MemberCall); + // We specifically allow an accessor named "at" to let STL in, though + // this is restricted to pseudo-arrays by requiring a single, integer + // argument. + const IdentifierInfo *Ident = Member->getMemberDecl()->getIdentifier(); + if (Ident && Ident->isStr("at") && MemberCall->getNumArgs() == 1) { + if (isIndexInSubscriptExpr(Context, MemberCall->getArg(0), IndexVar, + Member->getBase(), ContainerExpr, + ContainerNeedsDereference)) { + Usages.push_back(Usage(MemberCall)); + return true; + } + } + + if (containsExpr(Context, &DependentExprs, Member->getBase())) + ConfidenceLevel.lowerTo(TCK_Risky); + + return VisitorBase::TraverseCXXMemberCallExpr(MemberCall); +} + +/// \brief If an overloaded operator call is a dereference of IndexVar or +/// a subscript of a the container with IndexVar as the single argument, +/// include it as a valid usage and prune the traversal. +/// +/// For example, given +/// \code +/// struct Foo { int bar(); int x; }; +/// vector v; +/// void f(Foo); +/// \endcode +/// the following uses will be considered convertible: +/// \code +/// for (vector::iterator i = v.begin(), e = v.end(); i != e; ++i) { +/// f(*i); +/// } +/// for (int i = 0; i < v.size(); ++i) { +/// int i = v[i] + 1; +/// } +/// \endcode +bool ForLoopIndexUseVisitor::TraverseCXXOperatorCallExpr( + CXXOperatorCallExpr *OpCall) { + switch (OpCall->getOperator()) { + case OO_Star: + if (isDereferenceOfOpCall(OpCall, IndexVar)) { + Usages.push_back(Usage(OpCall)); + return true; + } + break; + + case OO_Subscript: + if (OpCall->getNumArgs() != 2) + break; + if (isIndexInSubscriptExpr(Context, OpCall->getArg(1), IndexVar, + OpCall->getArg(0), ContainerExpr, + ContainerNeedsDereference)) { + Usages.push_back(Usage(OpCall)); + return true; + } + break; + + default: + break; + } + return VisitorBase::TraverseCXXOperatorCallExpr(OpCall); +} + +/// \brief If we encounter an array with IndexVar as the index of an +/// ArraySubsriptExpression, note it as a consistent usage and prune the +/// AST traversal. +/// +/// For example, given +/// \code +/// const int N = 5; +/// int arr[N]; +/// \endcode +/// This is intended to permit +/// \code +/// for (int i = 0; i < N; ++i) { /* use arr[i] */ } +/// \endcode +/// but not +/// \code +/// for (int i = 0; i < N; ++i) { /* use notArr[i] */ } +/// \endcode +/// and further checking needs to be done later to ensure that exactly one array +/// is referenced. +bool ForLoopIndexUseVisitor::TraverseArraySubscriptExpr( + ArraySubscriptExpr *ASE) { + Expr *Arr = ASE->getBase(); + if (!isIndexInSubscriptExpr(ASE->getIdx(), IndexVar)) + return VisitorBase::TraverseArraySubscriptExpr(ASE); + + if ((ContainerExpr && !areSameExpr(Context, Arr->IgnoreParenImpCasts(), + ContainerExpr->IgnoreParenImpCasts())) + || !arrayMatchesBoundExpr(Context, Arr->IgnoreImpCasts()->getType(), + ArrayBoundExpr)) { + // If we have already discovered the array being indexed and this isn't it + // or this array doesn't match, mark this loop as unconvertible. + OnlyUsedAsIndex = false; + return VisitorBase::TraverseArraySubscriptExpr(ASE); + } + + if (!ContainerExpr) + ContainerExpr = Arr; + + Usages.push_back(Usage(ASE)); + return true; +} + +/// \brief If we encounter a reference to IndexVar in an unpruned branch of the +/// traversal, mark this loop as unconvertible. +/// +/// This implements the whitelist for convertible loops: any usages of IndexVar +/// not explicitly considered convertible by this traversal will be caught by +/// this function. +/// +/// Additionally, if the container expression is more complex than just a +/// DeclRefExpr, and some part of it is appears elsewhere in the loop, lower +/// our confidence in the transformation. +/// +/// For example, these are not permitted: +/// \code +/// for (int i = 0; i < N; ++i) { printf("arr[%d] = %d", i, arr[i]); } +/// for (vector::iterator i = container.begin(), e = container.end(); +/// i != e; ++i) +/// i.insert(0); +/// for (vector::iterator i = container.begin(), e = container.end(); +/// i != e; ++i) +/// i.insert(0); +/// for (vector::iterator i = container.begin(), e = container.end(); +/// i != e; ++i) +/// if (i + 1 != e) +/// printf("%d", *i); +/// \endcode +/// +/// And these will raise the risk level: +/// \code +/// int arr[10][20]; +/// int l = 5; +/// for (int j = 0; j < 20; ++j) +/// int k = arr[l][j] + l; // using l outside arr[l] is considered risky +/// for (int i = 0; i < obj.getVector().size(); ++i) +/// obj.foo(10); // using `obj` is considered risky +/// \endcode +bool ForLoopIndexUseVisitor::VisitDeclRefExpr(DeclRefExpr *DRE) { + const ValueDecl *TheDecl = DRE->getDecl(); + if (areSameVariable(IndexVar, TheDecl) || areSameVariable(EndVar, TheDecl)) + OnlyUsedAsIndex = false; + if (containsExpr(Context, &DependentExprs, DRE)) + ConfidenceLevel.lowerTo(TCK_Risky); + return true; +} + +/// \brief If we find that another variable is created just to refer to the loop +/// element, note it for reuse as the loop variable. +/// +/// See the comments for isAliasDecl. +bool ForLoopIndexUseVisitor::VisitDeclStmt(DeclStmt *DS) { + if (!AliasDecl && DS->isSingleDecl() && + isAliasDecl(DS->getSingleDecl(), IndexVar)) + AliasDecl = DS; + return true; +} + +//// \brief Apply the source transformations necessary to migrate the loop! +void LoopFixer::doConversion(ASTContext *Context, + const VarDecl *IndexVar, + const VarDecl *MaybeContainer, + StringRef ContainerString, + const UsageResult &Usages, + const DeclStmt *AliasDecl, const ForStmt *TheLoop, + bool ContainerNeedsDereference) { + std::string VarName; + + if (Usages.size() == 1 && AliasDecl) { + const VarDecl *AliasVar = cast(AliasDecl->getSingleDecl()); + VarName = AliasVar->getName().str(); + // We keep along the entire DeclStmt to keep the correct range here. + const SourceRange &ReplaceRange = AliasDecl->getSourceRange(); + if (!CountOnly) + Replace->insert( + Replacement(Context->getSourceManager(), + CharSourceRange::getTokenRange(ReplaceRange), "")); + // No further replacements are made to the loop, since the iterator or index + // was used exactly once - in the initialization of AliasVar. + } else { + VariableNamer Namer(GeneratedDecls, &ParentFinder->getStmtToParentStmtMap(), + TheLoop, IndexVar, MaybeContainer); + VarName = Namer.createIndexName(); + // First, replace all usages of the array subscript expression with our new + // variable. + for (UsageResult::const_iterator I = Usages.begin(), E = Usages.end(); + I != E; ++I) { + std::string ReplaceText = I->IsArrow ? VarName + "." : VarName; + ReplacedVarRanges->insert(std::make_pair(TheLoop, IndexVar)); + if (!CountOnly) + Replace->insert( + Replacement(Context->getSourceManager(), + CharSourceRange::getTokenRange(I->Range), + ReplaceText)); + } + } + + // Now, we need to construct the new range expresion. + SourceRange ParenRange(TheLoop->getLParenLoc(), TheLoop->getRParenLoc()); + + QualType AutoRefType = + Context->getLValueReferenceType(Context->getAutoDeductType()); + + std::string MaybeDereference = ContainerNeedsDereference ? "*" : ""; + std::string TypeString = AutoRefType.getAsString(); + std::string Range = ("(" + TypeString + " " + VarName + " : " + + MaybeDereference + ContainerString + ")").str(); + if (!CountOnly) + Replace->insert(Replacement(Context->getSourceManager(), + CharSourceRange::getTokenRange(ParenRange), + Range)); + GeneratedDecls->insert(make_pair(TheLoop, VarName)); +} + +/// \brief Determine whether Init appears to be an initializing an iterator. +/// +/// If it is, returns the object whose begin() or end() method is called, and +/// the output parameter isArrow is set to indicate whether the initialization +/// is called via . or ->. +static const Expr *getContainerFromBeginEndCall(const Expr* Init, bool IsBegin, + bool *IsArrow) { + // FIXME: Maybe allow declaration/initialization outside of the for loop? + const CXXMemberCallExpr *TheCall = + dyn_cast_or_null(digThroughConstructors(Init)); + if (!TheCall || TheCall->getNumArgs() != 0) + return NULL; + + const MemberExpr *Member = dyn_cast(TheCall->getCallee()); + if (!Member) + return NULL; + const std::string Name = Member->getMemberDecl()->getName(); + const std::string TargetName = IsBegin ? "begin" : "end"; + if (Name != TargetName) + return NULL; + + const Expr *SourceExpr = Member->getBase(); + if (!SourceExpr) + return NULL; + + *IsArrow = Member->isArrow(); + return SourceExpr; +} + +/// \brief Determines the container whose begin() and end() functions are called +/// for an iterator-based loop. +/// +/// BeginExpr must be a member call to a function named "begin()", and EndExpr +/// must be a member . +static const Expr *findContainer(ASTContext *Context, const Expr *BeginExpr, + const Expr *EndExpr, + bool *ContainerNeedsDereference) { + // Now that we know the loop variable and test expression, make sure they are + // valid. + bool BeginIsArrow = false; + bool EndIsArrow = false; + const Expr *BeginContainerExpr = + getContainerFromBeginEndCall(BeginExpr, /*IsBegin=*/true, &BeginIsArrow); + if (!BeginContainerExpr) + return NULL; + + const Expr *EndContainerExpr = + getContainerFromBeginEndCall(EndExpr, /*IsBegin=*/false, &EndIsArrow); + // Disallow loops that try evil things like this (note the dot and arrow): + // for (IteratorType It = Obj.begin(), E = Obj->end(); It != E; ++It) { } + if (!EndContainerExpr || BeginIsArrow != EndIsArrow || + !areSameExpr(Context, EndContainerExpr, BeginContainerExpr)) + return NULL; + + *ContainerNeedsDereference = BeginIsArrow; + return BeginContainerExpr; +} + +StringRef LoopFixer::checkDeferralsAndRejections(ASTContext *Context, + const Expr *ContainerExpr, + Confidence ConfidenceLevel, + const ForStmt *TheLoop) { + // If we already modified the range of this for loop, don't do any further + // updates on this iteration. + // FIXME: Once Replacements can detect conflicting edits, replace this + // implementation and rely on conflicting edit detection instead. + if (ReplacedVarRanges->count(TheLoop)) { + ++*DeferredChanges; + return ""; + } + + ParentFinder->gatherAncestors(Context->getTranslationUnitDecl()); + // Ensure that we do not try to move an expression dependent on a local + // variable declared inside the loop outside of it! + DependencyFinderASTVisitor + DependencyFinder(&ParentFinder->getStmtToParentStmtMap(), + &ParentFinder->getDeclToParentStmtMap(), + ReplacedVarRanges, TheLoop); + + // Not all of these are actually deferred changes. + // FIXME: Determine when the external dependency isn't an expression converted + // by another loop. + if (DependencyFinder.dependsOnInsideVariable(ContainerExpr)) { + ++*DeferredChanges; + return ""; + } + if (ConfidenceLevel.get() < RequiredConfidenceLevel) { + ++*RejectedChanges; + return ""; + } + + StringRef ContainerString = + getStringFromRange(Context->getSourceManager(), Context->getLangOpts(), + ContainerExpr->getSourceRange()); + // In case someone is using an evil macro, reject this change. + if (ContainerString.empty()) + ++*RejectedChanges; + return ContainerString; +} + +/// \brief Given that we have verified that the loop's header appears to be +/// convertible, run the complete analysis on the loop to determine if the +/// loop's body is convertible. +void LoopFixer::findAndVerifyUsages(ASTContext *Context, + const VarDecl *LoopVar, + const VarDecl *EndVar, + const Expr *ContainerExpr, + const Expr *BoundExpr, + bool ContainerNeedsDereference, + const ForStmt *TheLoop, + Confidence ConfidenceLevel) { + ForLoopIndexUseVisitor Finder(Context, LoopVar, EndVar, ContainerExpr, + BoundExpr, ContainerNeedsDereference); + if (ContainerExpr) { + ComponentFinderASTVisitor ComponentFinder; + ComponentFinder.findExprComponents(ContainerExpr->IgnoreParenImpCasts()); + Finder.addComponents(ComponentFinder.getComponents()); + } + + if (!Finder.findAndVerifyUsages(TheLoop->getBody())) + return; + + ConfidenceLevel.lowerTo(Finder.getConfidenceLevel()); + if (FixerKind == LFK_Array) { + // The array being indexed by IndexVar was discovered during traversal. + ContainerExpr = Finder.getContainerIndexed()->IgnoreParenImpCasts(); + // Very few loops are over expressions that generate arrays rather than + // array variables. Consider loops over arrays that aren't just represented + // by a variable to be risky conversions. + if (!getReferencedVariable(ContainerExpr) && + !isDirectMemberExpr(ContainerExpr)) + ConfidenceLevel.lowerTo(TCK_Risky); + } + + std::string ContainerString = + checkDeferralsAndRejections(Context, ContainerExpr, + ConfidenceLevel, TheLoop); + if (ContainerString.empty()) + return; + + doConversion(Context, LoopVar, getReferencedVariable(ContainerExpr), + ContainerString, Finder.getUsages(), + Finder.getAliasDecl(), TheLoop, ContainerNeedsDereference); + ++*AcceptedChanges; +} + +/// \brief The LoopFixer callback, which determines if loops discovered by the +/// matchers are convertible, printing information about the loops if so. +void LoopFixer::run(const MatchFinder::MatchResult &Result) { + const BoundNodes &Nodes = Result.Nodes; + Confidence ConfidenceLevel(TCK_Safe); + ASTContext *Context = Result.Context; + const ForStmt *TheLoop = Nodes.getStmtAs(LoopName); + + if (!Context->getSourceManager().isFromMainFile(TheLoop->getForLoc())) + return; + + // Check that we have exactly one index variable and at most one end variable. + const VarDecl *LoopVar = Nodes.getDeclAs(IncrementVarName); + const VarDecl *CondVar = Nodes.getDeclAs(ConditionVarName); + const VarDecl *InitVar = Nodes.getDeclAs(InitVarName); + if (!areSameVariable(LoopVar, CondVar) || !areSameVariable(LoopVar, InitVar)) + return; + const VarDecl *EndVar = Nodes.getDeclAs(EndVarName); + const VarDecl *ConditionEndVar = + Nodes.getDeclAs(ConditionEndVarName); + if (EndVar && !areSameVariable(EndVar, ConditionEndVar)) + return; + + // If the end comparison isn't a variable, we can try to work with the + // expression the loop variable is being tested against instead. + const CXXMemberCallExpr *EndCall = + Nodes.getStmtAs(EndCallName); + const Expr *BoundExpr = Nodes.getStmtAs(ConditionBoundName); + // If the loop calls end()/size() after each iteration, lower our confidence + // level. + if (FixerKind != LFK_Array && !EndVar) + ConfidenceLevel.lowerTo(TCK_Reasonable); + + const Expr *ContainerExpr = NULL; + bool ContainerNeedsDereference = false; + // FIXME: Try to put most of this logic inside a matcher. Currently, matchers + // don't allow the right-recursive checks in digThroughConstructors. + if (FixerKind == LFK_Iterator) + ContainerExpr = findContainer(Context, LoopVar->getInit(), + EndVar ? EndVar->getInit() : EndCall, + &ContainerNeedsDereference); + else if (FixerKind == LFK_PseudoArray) { + if (!EndCall) + return; + ContainerExpr = EndCall->getImplicitObjectArgument(); + const MemberExpr *Member = dyn_cast(EndCall->getCallee()); + if (!Member) + return; + ContainerNeedsDereference = Member->isArrow(); + } + // We must know the container or an array length bound. + if (!ContainerExpr && !BoundExpr) + return; + + findAndVerifyUsages(Context, LoopVar, EndVar, ContainerExpr, BoundExpr, + ContainerNeedsDereference, TheLoop, ConfidenceLevel); +} + +} // namespace loop_migrate +} // namespace clang Index: loop-convert/LoopConvert.cpp =================================================================== --- /dev/null +++ loop-convert/LoopConvert.cpp @@ -0,0 +1,137 @@ +//===-- loop-convert/LoopConvert.cpp - C++11 For loop migration -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a tool that migrates for loops to take advantage of the +// range-basead syntax new to C++11. +// +// Usage: +// loop-convert ... +// +// Where is a CMake build directory containing a file named +// compile_commands.json. +// +// ... specify the pahs of files in the CMake source tree, with the same +// requirements as other tools built on LibTooling. +// +//===----------------------------------------------------------------------===// + +#include "LoopActions.h" +#include "LoopMatchers.h" + +#include "clang/Basic/FileManager.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/Tooling.h" +#include "clang/Tooling/Refactoring.h" + +using clang::ast_matchers::MatchFinder; +namespace cl = llvm::cl; +using namespace clang::tooling; +using namespace clang::loop_migrate; + +static cl::opt BuildPath( + cl::Positional, + cl::desc("")); + +static cl::list SourcePaths( + cl::Positional, + cl::desc(" [... ]"), + cl::OneOrMore); + +// General options go here: +static cl::opt CountOnly( + "count-only", cl::desc("Do not apply transformations; only count them.")); + +static cl::opt TransformationLevel( + cl::desc("Choose safety requirements for transformations:"), + cl::values(clEnumValN(TCK_Safe, "A0", "Enable safe transformations"), + clEnumValN(TCK_Reasonable, "A1", + "Enable transformations that might change semantics " + "(default)"), + clEnumValN(TCK_Risky, "A2", + "Enable transformations that are likely " + "to change semantics"), + clEnumValEnd), + cl::init(TCK_Reasonable)); + +int main(int argc, const char **argv) { + llvm::OwningPtr Compilations( + FixedCompilationDatabase::loadFromCommandLine(argc, argv)); + cl::ParseCommandLineOptions(argc, argv); + if (!Compilations) { + std::string ErrorMessage; + Compilations.reset( + !BuildPath.empty() ? + CompilationDatabase::autoDetectFromDirectory(BuildPath, ErrorMessage) : + CompilationDatabase::autoDetectFromSource(SourcePaths[0], + ErrorMessage)); + if (!Compilations) + llvm::report_fatal_error(ErrorMessage); + } + ClangTool SyntaxTool(*Compilations, SourcePaths); + + // First, let's check to make sure there were no errors. + if (int result = + SyntaxTool.run(newFrontendActionFactory())) { + llvm::errs() << "Error compiling files.\n"; + return result; + } + + RefactoringTool LoopTool(*Compilations, SourcePaths); + StmtAncestorASTVisitor ParentFinder; + StmtGeneratedVarNameMap GeneratedDecls; + ReplacedVarsMap ReplacedVars; + unsigned AcceptedChanges = 0; + unsigned DeferredChanges = 0; + unsigned RejectedChanges = 0; + + MatchFinder Finder; + LoopFixer ArrayLoopFixer(&ParentFinder, &LoopTool.getReplacements(), + &GeneratedDecls, &ReplacedVars, &AcceptedChanges, + &DeferredChanges, &RejectedChanges, + CountOnly, TransformationLevel, LFK_Array); + Finder.addMatcher(makeArrayLoopMatcher(), &ArrayLoopFixer); + LoopFixer IteratorLoopFixer(&ParentFinder, &LoopTool.getReplacements(), + &GeneratedDecls, &ReplacedVars, &AcceptedChanges, + &DeferredChanges, &RejectedChanges, + CountOnly, TransformationLevel, LFK_Iterator); + Finder.addMatcher(makeIteratorLoopMatcher(), &IteratorLoopFixer); + LoopFixer PseudoarrrayLoopFixer(&ParentFinder, &LoopTool.getReplacements(), + &GeneratedDecls, &ReplacedVars, + &AcceptedChanges, &DeferredChanges, + &RejectedChanges, CountOnly, + TransformationLevel, LFK_PseudoArray); + Finder.addMatcher(makePseudoArrayLoopMatcher(), &PseudoarrrayLoopFixer); + if (int result = LoopTool.run(newFrontendActionFactory(&Finder))) { + llvm::errs() << "Error encountered during translation.\n"; + return result; + } + + llvm::outs() << "\nFor Loop Conversion:\n\t" << AcceptedChanges + << " converted loop(s)\n\t" << DeferredChanges + << " potentially conflicting change(s) deferred.\n\t" + << RejectedChanges << " change(s) rejected.\n"; + if (DeferredChanges > 0) + llvm::outs() << "Re-run this tool to attempt applying deferred changes.\n"; + if (RejectedChanges > 0) + llvm::outs() << "Re-run this tool with a lower required confidence level " + "to apply rejected changes.\n"; + + if (AcceptedChanges > 0) { + // Check to see if the changes introduced any new errors. + ClangTool EndSyntaxTool(*Compilations, SourcePaths); + if (int result = EndSyntaxTool.run( + newFrontendActionFactory())) { + llvm::errs() << "Error compiling files after translation.\n"; + return result; + } + } + + return 0; +} Index: loop-convert/LoopMatchers.h =================================================================== --- /dev/null +++ loop-convert/LoopMatchers.h @@ -0,0 +1,43 @@ +//===-- loop-convert/LoopMatchers.h - Matchers for for loops ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains declarations of the matchers for use in migrating +// C++ for loops. The matchers are responsible for checking the general shape of +// the for loop, namely the init, condition, and increment portions. +// Further analysis will be needed to confirm that the loop is in fact +// convertible in the matcher callback. +// +//===----------------------------------------------------------------------===// +#ifndef _LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_CONVERT_LOOP_MATCHERS_H_ +#define _LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_CONVERT_LOOP_MATCHERS_H_ + +#include "clang/ASTMatchers/ASTMatchers.h" + +namespace clang { +namespace loop_migrate { + +// Constants used for matcher name bindings +extern const char LoopName[]; +extern const char ConditionBoundName[]; +extern const char ConditionVarName[]; +extern const char ConditionEndVarName[]; +extern const char IncrementVarName[]; +extern const char InitVarName[]; +extern const char EndExprName[]; +extern const char EndCallName[]; +extern const char EndVarName[]; + +ast_matchers::StatementMatcher makeArrayLoopMatcher(); +ast_matchers::StatementMatcher makeIteratorLoopMatcher(); +ast_matchers::StatementMatcher makePseudoArrayLoopMatcher(); + +} //namespace loop_migrate +} //namespace clang + +#endif //_LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_CONVERT_LOOP_MATCHERS_H_ Index: loop-convert/LoopMatchers.cpp =================================================================== --- /dev/null +++ loop-convert/LoopMatchers.cpp @@ -0,0 +1,235 @@ +//===-- loop-convert/LoopMatchers.h - Matchers for for loops ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions of the matchers for use in migrating +// C++ for loops. +// +//===----------------------------------------------------------------------===// +#include "LoopMatchers.h" + +namespace clang { +namespace loop_migrate { + +using namespace clang::ast_matchers; +const char LoopName[] = "forLoop"; +const char ConditionBoundName[] = "conditionBound"; +const char ConditionVarName[] = "conditionVar"; +const char IncrementVarName[] = "incrementVar"; +const char InitVarName[] = "initVar"; +const char EndCallName[] = "endCall"; +const char ConditionEndVarName[] = "conditionEndVar"; +const char EndVarName[] = "endVar"; + +// shared matchers +static const TypeMatcher AnyType = anything(); + +static const StatementMatcher IntegerComparisonMatcher = + expr(ignoringParenImpCasts(declRefExpr(to( + varDecl(hasType(isInteger())).bind(ConditionVarName))))); + +static const DeclarationMatcher InitToZeroMatcher = + varDecl(hasInitializer(ignoringParenImpCasts( + integerLiteral(equals(0))))).bind(InitVarName); + +static const StatementMatcher IncrementVarMatcher = + declRefExpr(to( + varDecl(hasType(isInteger())).bind(IncrementVarName))); + +// FIXME: How best to document complicated matcher expressions? They're fairly +// self-documenting...but there may be some unintuitive parts. + +/// \brief The matcher for loops over arrays. +/// +/// In this general example, assuming 'j' and 'k' are of integral type: +/// \code +/// for (int i = 0; j < 3 + 2; ++k) { ... } +/// \endcode +/// The following string identifers are bound to the parts of the AST: +/// ConditionVarName: 'j' (as a VarDecl) +/// ConditionBoundName: '3 + 2' (as an Expr) +/// InitVarName: 'i' (as a VarDecl) +/// IncrementVarName: 'k' (as a VarDecl) +/// LoopName: The entire for loop (as a ForStmt) +/// +/// Client code will need to make sure that: +/// - The three index variables identified by the matcher are the same +/// VarDecl. +/// - The index variable is only used as an array index. +/// - All arrays indexed by the loop are the same. +StatementMatcher makeArrayLoopMatcher() { + StatementMatcher ArrayBoundMatcher = + expr(hasType(isInteger())).bind(ConditionBoundName); + + return forStmt( + hasLoopInit(declStmt(hasSingleDecl(InitToZeroMatcher))), + hasCondition(anyOf(binaryOperator(hasOperatorName("<"), + hasLHS(IntegerComparisonMatcher), + hasRHS(ArrayBoundMatcher)), + binaryOperator(hasOperatorName(">"), + hasLHS(ArrayBoundMatcher), + hasRHS(IntegerComparisonMatcher)))), + hasIncrement(unaryOperator(hasOperatorName("++"), + hasUnaryOperand(IncrementVarMatcher)))) + .bind(LoopName); +} + +/// \brief The matcher used for iterator-based for loops. +/// +/// This matcher is more flexible than array-based loops. It will match +/// catch loops of the following textual forms (regardless of whether the +/// iterator type is actually a pointer type or a class type): +/// +/// Assuming f, g, and h are of type containerType::iterator, +/// \code +/// for (containerType::iterator it = container.begin(), +/// e = createIterator(); f != g; ++h) { ... } +/// for (containerType::iterator it = container.begin(); +/// f != anotherContainer.end(); ++h) { ... } +/// \endcode +/// The following string identifiers are bound to the parts of the AST: +/// InitVarName: 'it' (as a VarDecl) +/// ConditionVarName: 'f' (as a VarDecl) +/// LoopName: The entire for loop (as a ForStmt) +/// In the first example only: +/// EndVarName: 'e' (as a VarDecl) +/// ConditionEndVarName: 'g' (as a VarDecl) +/// In the second example only: +/// EndCallName: 'container.end()' (as a CXXMemberCallExpr) +/// +/// Client code will need to make sure that: +/// - The iterator variables 'it', 'f', and 'h' are the same +/// - The two containers on which 'begin' and 'end' are called are the same +/// - If the end iterator variable 'g' is defined, it is the same as 'f' +StatementMatcher makeIteratorLoopMatcher() { + StatementMatcher BeginCallMatcher = + memberCallExpr(argumentCountIs(0), callee(methodDecl(hasName("begin")))); + + DeclarationMatcher InitDeclMatcher = + varDecl(hasInitializer(anything())).bind(InitVarName); + + DeclarationMatcher EndDeclMatcher = + varDecl(hasInitializer(anything())).bind(EndVarName); + + StatementMatcher EndCallMatcher = + memberCallExpr(argumentCountIs(0), callee(methodDecl(hasName("end")))); + + StatementMatcher IteratorBoundMatcher = + expr(anyOf(ignoringParenImpCasts(declRefExpr(to( + varDecl().bind(ConditionEndVarName)))), + ignoringParenImpCasts( + expr(EndCallMatcher).bind(EndCallName)), + materializeTempExpr(ignoringParenImpCasts( + expr(EndCallMatcher).bind(EndCallName))))); + + StatementMatcher IteratorComparisonMatcher = + expr(ignoringParenImpCasts(declRefExpr(to( + varDecl().bind(ConditionVarName))))); + + StatementMatcher OverloadedNEQMatcher = operatorCallExpr( + hasOverloadedOperatorName("!="), + argumentCountIs(2), + hasArgument(0, IteratorComparisonMatcher), + hasArgument(1, IteratorBoundMatcher)); + + return forStmt( + hasLoopInit(anyOf( + declStmt(declCountIs(2), + containsDeclaration(0, InitDeclMatcher), + containsDeclaration(1, EndDeclMatcher)), + declStmt(hasSingleDecl(InitDeclMatcher)))), + hasCondition(anyOf( + binaryOperator(hasOperatorName("!="), + hasLHS(IteratorComparisonMatcher), + hasRHS(IteratorBoundMatcher)), + binaryOperator(hasOperatorName("!="), + hasLHS(IteratorBoundMatcher), + hasRHS(IteratorComparisonMatcher)), + OverloadedNEQMatcher)), + hasIncrement(anyOf( + unaryOperator(hasOperatorName("++"), + hasUnaryOperand(declRefExpr(to( + varDecl(hasType(pointsTo(AnyType))) + .bind(IncrementVarName))))), + operatorCallExpr( + hasOverloadedOperatorName("++"), + hasArgument(0, declRefExpr(to( + varDecl().bind(IncrementVarName)))))))) + .bind(LoopName); +} + +/// \brief The matcher used for array-like containers (pseudoarrays). +/// +/// This matcher is more flexible than array-based loops. It will match +/// loops of the following textual forms (regardless of whether the +/// iterator type is actually a pointer type or a class type): +/// +/// Assuming f, g, and h are of type containerType::iterator, +/// \code +/// for (int i = 0, j = container.size(); f < g; ++h) { ... } +/// for (int i = 0; f < container.size(); ++h) { ... } +/// \endcode +/// The following string identifiers are bound to the parts of the AST: +/// InitVarName: 'i' (as a VarDecl) +/// ConditionVarName: 'f' (as a VarDecl) +/// LoopName: The entire for loop (as a ForStmt) +/// In the first example only: +/// EndVarName: 'j' (as a VarDecl) +/// ConditionEndVarName: 'g' (as a VarDecl) +/// In the second example only: +/// EndCallName: 'container.size()' (as a CXXMemberCallExpr) +/// +/// Client code will need to make sure that: +/// - The index variables 'i', 'f', and 'h' are the same +/// - The containers on which 'size()' is called is the container indexed +/// - The index variable is only used in overloaded operator[] or +/// container.at() +/// - If the end iterator variable 'g' is defined, it is the same as 'j' +/// - The container's iterators would not be invalidated during the loop +StatementMatcher makePseudoArrayLoopMatcher() { + StatementMatcher SizeCallMatcher = + memberCallExpr(argumentCountIs(0), + callee(methodDecl(anyOf(hasName("size"), + hasName("length"))))); + + StatementMatcher EndInitMatcher = + expr(anyOf( + ignoringParenImpCasts(expr(SizeCallMatcher).bind(EndCallName)), + explicitCastExpr(hasSourceExpression(ignoringParenImpCasts( + expr(SizeCallMatcher).bind(EndCallName)))))); + + DeclarationMatcher EndDeclMatcher = + varDecl(hasInitializer(EndInitMatcher)).bind(EndVarName); + + StatementMatcher IndexBoundMatcher = + expr(anyOf( + ignoringParenImpCasts(declRefExpr(to( + varDecl(hasType(isInteger())).bind(ConditionEndVarName)))), + EndInitMatcher)); + + return forStmt( + hasLoopInit(anyOf( + declStmt(declCountIs(2), + containsDeclaration(0, InitToZeroMatcher), + containsDeclaration(1, EndDeclMatcher)), + declStmt(hasSingleDecl(InitToZeroMatcher)))), + hasCondition(anyOf( + binaryOperator(hasOperatorName("<"), + hasLHS(IntegerComparisonMatcher), + hasRHS(IndexBoundMatcher)), + binaryOperator(hasOperatorName(">"), + hasLHS(IndexBoundMatcher), + hasRHS(IntegerComparisonMatcher)))), + hasIncrement(unaryOperator( + hasOperatorName("++"), + hasUnaryOperand(IncrementVarMatcher)))) + .bind(LoopName); +} + +} // namespace loop_migrate +} // namespace clang Index: loop-convert/StmtAncestor.h =================================================================== --- /dev/null +++ loop-convert/StmtAncestor.h @@ -0,0 +1,199 @@ +//===-- loop-convert/StmtAncestor.h - AST property visitors -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of several RecursiveASTVisitors used to +// build and check data structures used in loop migration. +// +//===----------------------------------------------------------------------===// +#ifndef _LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_CONVERT_STMT_ANCESTOR_H_ +#define _LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_CONVERT_STMT_ANCESTOR_H_ +#include "clang/AST/RecursiveASTVisitor.h" + +namespace clang { +namespace loop_migrate { + +/// A map used to walk the AST in reverse: maps child Stmt to parent Stmt. +typedef llvm::DenseMap StmtParentMap; + +/// A map used to walk the AST in reverse: +/// maps VarDecl to the to parent DeclStmt. +typedef llvm::DenseMap DeclParentMap; + +/// A map used to track which variables have been removed by a refactoring pass. +/// It maps the parent ForStmt to the removed index variable's VarDecl. +typedef llvm::DenseMap ReplacedVarsMap; + +/// A map used to remember the variable names generated in a Stmt +typedef llvm::DenseMap StmtGeneratedVarNameMap; + +/// A vector used to store the AST subtrees of an Expr. +typedef llvm::SmallVector ComponentVector; + +/// \brief Class used build the reverse AST properties needed to detect +/// name conflicts and free variables. +class StmtAncestorASTVisitor : + public RecursiveASTVisitor { + public: + StmtAncestorASTVisitor() { + StmtStack.push_back(NULL); + } + + /// \brief Run the analysis on the TranslationUnitDecl. + /// + /// In case we're running this analysis multiple times, don't repeat the work. + void gatherAncestors(const TranslationUnitDecl *TUD) { + if (StmtAncestors.empty()) + TraverseDecl(const_cast(TUD)); + } + + /// Accessor for StmtAncestors. + const StmtParentMap &getStmtToParentStmtMap() { + return StmtAncestors; + } + + /// Accessor for DeclParents. + const DeclParentMap &getDeclToParentStmtMap() { + return DeclParents; + } + + friend class RecursiveASTVisitor; + + private: + StmtParentMap StmtAncestors; + DeclParentMap DeclParents; + llvm::SmallVector StmtStack; + + bool TraverseStmt(Stmt *Statement); + bool VisitDeclStmt(DeclStmt *Statement); +}; + +/// Class used to find the variables and member expressions on which an +/// arbitrary expression depends. +class ComponentFinderASTVisitor : + public RecursiveASTVisitor { + public: + ComponentFinderASTVisitor() { } + + /// Find the components of an expression and place them in a ComponentVector. + void findExprComponents(const Expr *SourceExpr) { + Expr *E = const_cast(SourceExpr); + RecursiveASTVisitor::TraverseStmt(E); + } + + /// Accessor for Components. + const ComponentVector &getComponents() { + return Components; + } + + friend class RecursiveASTVisitor; + + private: + ComponentVector Components; + + bool VisitDeclRefExpr(DeclRefExpr *E); + bool VisitMemberExpr(MemberExpr *Member); +}; + +/// Class used to determine if an expression is dependent on a variable declared +/// inside of the loop where it would be used. +class DependencyFinderASTVisitor : + public RecursiveASTVisitor { + public: + DependencyFinderASTVisitor(const StmtParentMap *StmtParents, + const DeclParentMap *DeclParents, + const ReplacedVarsMap *ReplacedVars, + const Stmt *ContainingStmt) : + StmtParents(StmtParents), DeclParents(DeclParents), + ContainingStmt(ContainingStmt), ReplacedVars(ReplacedVars) { } + + /// \brief Run the analysis on Body, and return true iff the expression + /// depends on some variable declared within ContainingStmt. + /// + /// This is intended to protect against hoisting the container expression + /// outside of an inner context if part of that expression is declared in that + /// inner context. + /// + /// For example, + /// \code + /// const int N = 10, M = 20; + /// int arr[N][M]; + /// int getRow(); + /// + /// for (int i = 0; i < M; ++i) { + /// int k = getRow(); + /// printf("%d:", arr[k][i]); + /// } + /// \endcode + /// At first glance, this loop looks like it could be changed to + /// \code + /// for (int elem : arr[k]) { + /// int k = getIndex(); + /// printf("%d:", elem); + /// } + /// \endcode + /// But this is malformed, since `k` is used before it is defined! + /// + /// In order to avoid this, this class looks at the container expression + /// `arr[k]` and decides whether or not it contains a sub-expression declared + /// within the the loop body. + bool dependsOnInsideVariable(const Stmt *Body) { + DependsOnInsideVariable = false; + TraverseStmt(const_cast(Body)); + return DependsOnInsideVariable; + } + + friend class RecursiveASTVisitor; + + private: + const StmtParentMap *StmtParents; + const DeclParentMap *DeclParents; + const Stmt *ContainingStmt; + const ReplacedVarsMap *ReplacedVars; + bool DependsOnInsideVariable; + + bool VisitVarDecl(VarDecl *VD); + bool VisitDeclRefExpr(DeclRefExpr *DRE); +}; + +/// Class used to determine if any declarations used in a Stmt would conflict +/// with a particular identifier. This search includes the names that don't +/// actually appear in the AST (i.e. created by a refactoring tool) by including +/// a map from Stmts to generated names associated with those stmts. +class DeclFinderASTVisitor : public RecursiveASTVisitor { + public: + DeclFinderASTVisitor(const std::string &Name, + const StmtGeneratedVarNameMap *GeneratedDecls) : + Name(Name), GeneratedDecls(GeneratedDecls), Found(false) { } + + /// Attempts to find any usages of variables name Name in Body, returning + /// true when it is used in Body. This includes the generated loop variables + /// of ForStmts which have already been transformed. + bool findUsages(const Stmt *Body) { + Found = false; + TraverseStmt(const_cast(Body)); + return Found; + } + + friend class RecursiveASTVisitor; + + private: + std::string Name; + /// GeneratedDecls keeps track of ForStmts which have been tranformed, mapping + /// each modified ForStmt to the variable generated in the loop. + const StmtGeneratedVarNameMap *GeneratedDecls; + bool Found; + + bool VisitForStmt(ForStmt *FS); + bool VisitNamedDecl(NamedDecl *ND); + bool VisitDeclRefExpr(DeclRefExpr *DRE); +}; + +} // namespace for_migrate +} // namespace clang +#endif // _LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_CONVERT_STMT_ANCESTOR_H_ Index: loop-convert/StmtAncestor.cpp =================================================================== --- /dev/null +++ loop-convert/StmtAncestor.cpp @@ -0,0 +1,120 @@ +//===-- loop-convert/StmtAncestor.cpp - AST property visitors ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of several RecursiveASTVisitors used to +// build and check data structures used in loop migration. +// +//===----------------------------------------------------------------------===// +#include "StmtAncestor.h" + +namespace clang { +namespace loop_migrate { + +/// \brief Tracks a stack of parent statements during traversal. +/// +/// All this really does is inject push_back() before running +/// RecursiveASTVisitor::TraverseStmt() and pop_back() afterwards. The Stmt atop +/// the stack is the parent of the current statement (NULL for the topmost +/// statement). +bool StmtAncestorASTVisitor::TraverseStmt(Stmt *Statement) { + StmtAncestors.insert(std::make_pair(Statement, StmtStack.back())); + StmtStack.push_back(Statement); + RecursiveASTVisitor::TraverseStmt(Statement); + StmtStack.pop_back(); + return true; +} + +/// \brief Keep track of the DeclStmt associated with each VarDecl. +/// +/// Combined with StmtAncestors, this provides roughly the same information as +/// Scope, as we can map a VarDecl to its DeclStmt, then walk up the parent tree +/// using StmtAncestors. +bool StmtAncestorASTVisitor::VisitDeclStmt(DeclStmt *Decls) { + for (DeclStmt::const_decl_iterator I = Decls->decl_begin(), + E = Decls->decl_end(); I != E; ++I) + if (const VarDecl *VD = dyn_cast(*I)) + DeclParents.insert(std::make_pair(VD, Decls)); + return true; +} + +/// \brief record the DeclRefExpr as part of the parent expression. +bool ComponentFinderASTVisitor::VisitDeclRefExpr(DeclRefExpr *E) { + Components.push_back(E); + return true; +} + +/// \brief record the MemberExpr as part of the parent expression. +bool ComponentFinderASTVisitor::VisitMemberExpr(MemberExpr *Member) { + Components.push_back(Member); + return true; +} + +/// \brief Forward any DeclRefExprs to a check on the referenced variable +/// declaration. +bool DependencyFinderASTVisitor::VisitDeclRefExpr(DeclRefExpr *DRE) { + if (VarDecl *VD = dyn_cast_or_null(DRE->getDecl())) + return VisitVarDecl(VD); + return true; +} + +/// \brief Determine if any this variable is declared inside the ContainingStmt. +bool DependencyFinderASTVisitor::VisitVarDecl(VarDecl *VD) { + const Stmt *Curr = DeclParents->lookup(VD); + // First, see if the variable was declared within an inner scope of the loop. + while (Curr != NULL) { + if (Curr == ContainingStmt) { + DependsOnInsideVariable = true; + return false; + } + Curr = StmtParents->lookup(Curr); + } + + // Next, check if the variable was removed from existence by an earlier + // iteration. + for (ReplacedVarsMap::const_iterator I = ReplacedVars->begin(), + E = ReplacedVars->end(); I != E; ++I) + if ((*I).second == VD) { + DependsOnInsideVariable = true; + return false; + } + return true; +} + +/// \brief If we already created a variable for TheLoop, check to make sure +/// that the name was not already taken. +bool DeclFinderASTVisitor::VisitForStmt(ForStmt *TheLoop) { + StmtGeneratedVarNameMap::const_iterator I = GeneratedDecls->find(TheLoop); + if (I != GeneratedDecls->end() && I->second == Name) { + Found = true; + return false; + } + return true; +} + +/// \brief If any named declaration within the AST subtree has the same name, +/// then consider Name already taken. +bool DeclFinderASTVisitor::VisitNamedDecl(NamedDecl *ND) { + const IdentifierInfo *Ident = ND->getIdentifier(); + if (Ident && Ident->getName() == Name) { + Found = true; + return false; + } + return true; +} + +/// \brief Forward any declaration references to the actual check on the +/// referenced declaration. +bool DeclFinderASTVisitor::VisitDeclRefExpr(DeclRefExpr *DRE) { + if (NamedDecl *ND = dyn_cast(DRE->getDecl())) + return VisitNamedDecl(ND); + return true; +} + +} // namespace clang +} // namespace for_migrate Index: loop-convert/VariableNaming.h =================================================================== --- /dev/null +++ loop-convert/VariableNaming.h @@ -0,0 +1,59 @@ +//===-- loop-convert/VariableNaming.h - Gererate variable names -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the VariableNamer class, which is +// responsible for generating new variable names and ensuring that they do not +// conflict with existing ones. +// +//===----------------------------------------------------------------------===// +#ifndef _LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_VARIABLE_NAMING_H_ +#define _LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_VARIABLE_NAMING_H_ + +#include "StmtAncestor.h" +#include "clang/AST/ASTContext.h" + +namespace clang { +namespace loop_migrate { + +/// \brief Create names for generated variables within a particular statement. +/// +/// VariableNamer uses a DeclContext as a reference point, checking for any +/// conflicting declarations higher up in the context or within SourceStmt. +/// It creates a variable name using hints from a source container and the old +/// index, if they exist. +class VariableNamer { + public: + VariableNamer(StmtGeneratedVarNameMap *GeneratedDecls, + const StmtParentMap *ReverseAST, const Stmt *SourceStmt, + const VarDecl *OldIndex, const VarDecl *TheContainer) : + GeneratedDecls(GeneratedDecls), ReverseAST(ReverseAST), + SourceStmt(SourceStmt), OldIndex(OldIndex), TheContainer(TheContainer) { } + + /// \brief Generate a new index name. + /// + /// Generates the name to be used for an inserted iterator. It relies on + /// declarationExists() to determine that there are no naming conflicts, and + /// tries to use some hints from the container name and the old index name. + std::string createIndexName(); + + private: + StmtGeneratedVarNameMap *GeneratedDecls; + const StmtParentMap *ReverseAST; + const Stmt *SourceStmt; + const VarDecl *OldIndex; + const VarDecl *TheContainer; + + // Determine whether or not a declaration that would conflict with Symbol + // exists in an outer context or in any statement contained in SourceStmt. + bool declarationExists(const StringRef Symbol); +}; + +} // namespace loop_migrate +} // namespace clang +#endif // _LLVM_TOOLS_CLANG_TOOLS_EXTRA_LOOP_VARIABLE_NAMING_H_ Index: loop-convert/VariableNaming.cpp =================================================================== --- /dev/null +++ loop-convert/VariableNaming.cpp @@ -0,0 +1,84 @@ +//===-- loop-convert/VariableNaming.h - Gererate variable names -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitino of the VariableNamer class, which is +// responsible for generating new variable names and ensuring that they do not +// conflict with existing ones. +// +//===----------------------------------------------------------------------===// +#include "VariableNaming.h" + +namespace clang { +namespace loop_migrate { + +std::string VariableNamer::createIndexName() { + // FIXME: Add in naming conventions to handle: + // - Uppercase/lowercase indices + // - How to handle conflicts + // - An interactive process for naming + std::string IteratorName; + std::string ContainerName; + if (TheContainer) + ContainerName = TheContainer->getName().str(); + + size_t Len = ContainerName.length(); + if (Len > 1 && ContainerName[Len - 1] == 's') + IteratorName = ContainerName.substr(0, Len - 1); + else + IteratorName = "elem"; + + if (!declarationExists(IteratorName)) + return IteratorName; + + IteratorName = ContainerName + "_" + OldIndex->getName().str(); + if (!declarationExists(IteratorName)) + return IteratorName; + + IteratorName = ContainerName + "_elem"; + if (!declarationExists(IteratorName)) + return IteratorName; + + IteratorName += "_elem"; + if (!declarationExists(IteratorName)) + return IteratorName; + + IteratorName = "_elem_"; + + // Someone defeated my naming scheme... + while (declarationExists(IteratorName)) + IteratorName += "i"; + return IteratorName; +} + +/// \brief Determines whether or not the the name Symbol exists in LoopContext, +/// any of its parent contexts, or any of its child statements. +/// +/// We also check to see if the same identifier was generated by this loop +/// converter in a loop nested within SourceStmt. +bool VariableNamer::declarationExists(const StringRef Symbol) { + // Determine if the symbol was generated in a parent context. + for (const Stmt *S = SourceStmt; S != NULL; S = ReverseAST->lookup(S)) { + StmtGeneratedVarNameMap::const_iterator I = GeneratedDecls->find(S); + if (I != GeneratedDecls->end() && I->second == Symbol) + return true; + } + + // FIXME: Rather than detecting conflicts at their usages, we should check the + // parent context. + // For some reason, lookup() always returns the pair (NULL, NULL) because its + // StoredDeclsMap is not initialized (i.e. LookupPtr.getInt() is false inside + // of DeclContext::lookup()). Why is this? + + // Finally, determine if the symbol was used in the loop or a child context. + DeclFinderASTVisitor DeclFinder(Symbol, GeneratedDecls); + return DeclFinder.findUsages(SourceStmt); +} + +} // namespace loop_migrate +} // namespace clang Index: test/CMakeLists.txt =================================================================== --- test/CMakeLists.txt +++ test/CMakeLists.txt @@ -22,7 +22,7 @@ clang clang-headers FileCheck count not # Individual tools we test. - remove-cstr-calls + remove-cstr-calls loop-convert ) add_lit_testsuite(check-clang-tools "Running the Clang extra tools' regression tests" Index: test/loop-convert/Inputs/negative-header.h =================================================================== --- /dev/null +++ test/loop-convert/Inputs/negative-header.h @@ -0,0 +1,14 @@ +#ifndef _CLANG_TOOLS_EXTRA_H_ +#define _CLANG_TOOLS_EXTRA_H_ + +// Single FileCheck line to make sure that no loops are converted. +// CHECK-NOT: for ({{.*[^:]:[^:].*}}) +static void loopInHeader() { + const int N = 10; + int arr[N]; + int sum = 0; + for (int i = 0; i < N; ++i) + sum += arr[i]; +} + +#endif //_CLANG_TOOLS_EXTRA_H_ Index: test/loop-convert/Inputs/structures.h =================================================================== --- /dev/null +++ test/loop-convert/Inputs/structures.h @@ -0,0 +1,140 @@ +#ifndef _LLVM_TOOLS_CLANG_TOOLS_TESTS_TOOLING_STRUCTURES_H_ +#define _LLVM_TOOLS_CLANG_TOOLS_TESTS_TOOLING_STRUCTURES_H_ + +extern "C" { +extern int printf(const char *restrict, ...); +} + +struct Val {int x; void g(); }; + +struct MutableVal { + void constFun(int) const; + void nonConstFun(int, int); + void constFun(MutableVal &) const; + void constParamFun(const MutableVal &) const; + void nonConstParamFun(const MutableVal &); + int x; +}; + +struct S { + typedef MutableVal *iterator; + typedef const MutableVal *const_iterator; + const_iterator begin() const; + const_iterator end() const; + iterator begin(); + iterator end(); +}; + +struct T { + struct iterator { + int& operator*(); + const int& operator*()const; + iterator& operator ++(); + bool operator!=(const iterator &other); + void insert(int); + int x; + }; + iterator begin(); + iterator end(); +}; + +struct U { + struct iterator { + Val& operator*(); + const Val& operator*()const; + iterator& operator ++(); + bool operator!=(const iterator &other); + Val *operator->(); + }; + iterator begin(); + iterator end(); + int x; +}; + +struct X { + S s; + T t; + U u; + S getS(); +}; + +template +class dependent{ + public: + struct iterator_base { + const ElemType& operator*()const; + iterator_base& operator ++(); + bool operator!=(const iterator_base &other) const; + const ElemType *operator->() const; + }; + + struct iterator : iterator_base { + ElemType& operator*(); + iterator& operator ++(); + ElemType *operator->(); + }; + + typedef iterator_base const_iterator; + const_iterator begin() const; + const_iterator end() const; + iterator begin(); + iterator end(); + unsigned size() const; + ElemType & operator[](unsigned); + const ElemType & operator[](unsigned) const; + ElemType & at(unsigned); + const ElemType & at(unsigned) const; + + // Intentionally evil. + dependent operator*(); + + void foo(); + void constFoo() const; +}; + +template +class doublyDependent{ + public: + struct Value { + First first; + Second second; + }; + + struct iterator_base { + const Value& operator*()const; + iterator_base& operator ++(); + bool operator!=(const iterator_base &other) const; + const Value *operator->() const; + }; + + struct iterator : iterator_base { + Value& operator*(); + Value& operator ++(); + Value *operator->(); + }; + + typedef iterator_base const_iterator; + const_iterator begin() const; + const_iterator end() const; + iterator begin(); + iterator end(); +}; + +template +class transparent { + public: + Contained *at(); + Contained *operator->(); + Contained operator*(); +}; + +template +struct Nested { + typedef IteratorType* iterator; + IteratorType *operator->(); + IteratorType operator*(); + iterator begin(); + iterator end(); +}; + +#endif // _LLVM_TOOLS_CLANG_TOOLS_TESTS_TOOLING_STRUCTURES_H_ Index: test/loop-convert/loop-convert-array.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-array.cpp @@ -0,0 +1,156 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert . %t.cpp -- -I %S/Inputs \ +// RUN: && FileCheck -input-file=%t.cpp %s +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: cp %t.cpp %t.base +// RUN: loop-convert -count-only . %t.cpp -- -I %S/Inputs > %T/out \ +// RUN: && FileCheck -check-prefix=COUNTONLY -input-file=%T/out %s \ +// RUN: && diff %t.cpp %t.base + +#include "structures.h" + +const int N = 6; +const int NMinusOne = N - 1; +int arr[N] = {1, 2, 3, 4, 5, 6}; +int (*pArr)[N] = &arr; + +void f() { + int sum = 0; + // Update the number of correctly converted loops as this test changes: + // COUNTONLY: 15 converted + // COUNTONLY-NEXT: 0 potentially conflicting + // COUNTONLY-NEXT: 0 change(s) rejected + + for (int i = 0; i < N; ++i) { + sum += arr[i]; + int k; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : arr) { + // CHECK-NEXT: sum += [[VAR]]; + // CHECK-NEXT: int k; + // CHECK-NEXT: } + + for (int i = 0; i < N; ++i) { + printf("Fibonacci number is %d\n", arr[i]); + sum += arr[i] + 2; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : arr) + // CHECK-NEXT: printf("Fibonacci number is %d\n", [[VAR]]); + // CHECK-NEXT: sum += [[VAR]] + 2; + + for (int i = 0; i < N; ++i) { + int x = arr[i]; + int y = arr[i] + 2; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : arr) + // CHECK-NEXT: int x = [[VAR]]; + // CHECK-NEXT: int y = [[VAR]] + 2; + + for (int i = 0; i < N; ++i) { + int x = N; + x = arr[i]; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : arr) + // CHECK-NEXT: int x = N; + // CHECK-NEXT: x = [[VAR]]; + + for (int i = 0; i < N; ++i) { + arr[i] += 1; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : arr) { + // CHECK-NEXT: [[VAR]] += 1; + // CHECK-NEXT: } + + for (int i = 0; i < N; ++i) { + int x = arr[i] + 2; + arr[i] ++; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : arr) + // CHECK-NEXT: int x = [[VAR]] + 2; + // CHECK-NEXT: [[VAR]] ++; + + for (int i = 0; i < N; ++i) { + arr[i] = 4 + arr[i]; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : arr) + // CHECK-NEXT: [[VAR]] = 4 + [[VAR]]; + + for (int i = 0; i < NMinusOne + 1; ++i) { + sum += arr[i]; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : arr) { + // CHECK-NEXT: sum += [[VAR]]; + // CHECK-NEXT: } + + for (int i = 0; i < N; ++i) { + printf("Fibonacci number %d has address %p\n", arr[i], &arr[i]); + sum += arr[i] + 2; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : arr) + // CHECK-NEXT: printf("Fibonacci number %d has address %p\n", [[VAR]], &[[VAR]]); + // CHECK-NEXT: sum += [[VAR]] + 2; + + Val teas[N]; + for (int i = 0; i < N; ++i) { + teas[i].g(); + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : teas) { + // CHECK-NEXT: [[VAR]].g(); + // CHECK-NEXT: } +} + +struct HasArr { + int Arr[N]; + Val ValArr[N]; + void implicitThis() { + for (int i = 0; i < N; ++i) { + printf("%d", Arr[i]); + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : Arr) { + // CHECK-NEXT: printf("%d", [[VAR]]); + // CHECK-NEXT: } + + for (int i = 0; i < N; ++i) { + printf("%d", ValArr[i].x); + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : ValArr) { + // CHECK-NEXT: printf("%d", [[VAR]].x); + // CHECK-NEXT: } + } + + void explicitThis() { + for (int i = 0; i < N; ++i) { + printf("%d", this->Arr[i]); + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : this->Arr) { + // CHECK-NEXT: printf("%d", [[VAR]]); + // CHECK-NEXT: } + + for (int i = 0; i < N; ++i) { + printf("%d", this->ValArr[i].x); + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : this->ValArr) { + // CHECK-NEXT: printf("%d", [[VAR]].x); + // CHECK-NEXT: } + } +}; + +// Loops whose bounds are value-dependent shold not be converted. +template +void dependentExprBound() { + for (int i = 0; i < N; ++i) + arr[i] = 0; + // CHECK: for (int i = 0; i < N; ++i) + // CHECK-NEXT: arr[i] = 0; +} +template void dependentExprBound<20>(); + +void memberFunctionPointer() { + Val v; + void (Val::*mfpArr[N])(void) = { &Val::g }; + for (int i = 0; i < N; ++i) + (v.*mfpArr[i])(); + // CHECK: for (auto & [[VAR:[a-z_]+]] : mfpArr) + // CHECK-NEXT: (v.*[[VAR]])(); +} Index: test/loop-convert/loop-convert-confidence.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-confidence.cpp @@ -0,0 +1,36 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert . %t.cpp -- -I %S/Inputs \ +// RUN: && FileCheck -input-file=%t.cpp %s +// RUN: loop-convert . %t.cpp -A2 -- -I %S/Inputs \ +// RUN: && FileCheck -check-prefix=RISKY -input-file=%t.cpp %s + +#include "structures.h" + +void f() { + const int N = 5; + const int M = 7; + int (*pArr)[N]; + int Arr[N][M]; + int sum = 0; + + for (int i = 0; i < M; ++i) { + sum += Arr[0][i]; + } + // CHECK: for (int i = 0; i < M; ++i) { + // CHECK-NEXT: sum += Arr[0][i]; + // CHECK-NEXT: } + // RISKY: for (auto & [[VAR:[a-z_]+]] : Arr[0]) { + // RISKY-NEXT: sum += [[VAR]]; + // RISKY-NEXT: } + + for (int i = 0; i < N; ++i) { + sum += (*pArr)[i]; + } + // RISKY: for (auto & [[VAR:[a-z_]+]] : *pArr) { + // RISKY-NEXT: sum += [[VAR]]; + // RISKY-NEXT: } + // CHECK: for (int i = 0; i < N; ++i) { + // CHECK-NEXT: sum += (*pArr)[i]; + // CHECK-NEXT: } +} Index: test/loop-convert/loop-convert-dependency.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-dependency.cpp @@ -0,0 +1,27 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert . %t.cpp -- && FileCheck -input-file=%t.cpp %s + +void f() { + const int N = 6; + const int M = 8; + int arr[N][M]; + + for (int i = 0; i < N; ++i) { + int a = 0; + int b = arr[i][a]; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : arr) { + // CHECK-NEXT: int a = 0; + // CHECK-NEXT: int b = [[VAR]][a]; + // CHECK-NEXT: } + + for (int j = 0; j < M; ++j) { + int a = 0; + int b = arr[a][j]; + } + // CHECK: for (int j = 0; j < M; ++j) { + // CHECK-NEXT: int a = 0; + // CHECK-NEXT: int b = arr[a][j]; + // CHECK-NEXT: } +} Index: test/loop-convert/loop-convert-iterator.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-iterator.cpp @@ -0,0 +1,106 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert . %t.cpp -- -I %S/Inputs \ +// RUN: && FileCheck -input-file=%t.cpp %s +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: rm -rf %t.cpp + +#include "structures.h" + +void f() { + /// begin()/end() - based for loops here: + T t; + for (T::iterator it = t.begin(), e = t.end(); it != e; ++it) { + printf("I found %d\n", *it); + } + // CHECK: for ({{[a-zA-Z_ ]+&? ?}}[[VAR:[a-z_]+]] : t) + // CHECK-NEXT: printf("I found %d\n", [[VAR]]); + + T *pt; + for (T::iterator it = pt->begin(), e = pt->end(); it != e; ++it) { + printf("I found %d\n", *it); + } + // CHECK: for ({{[a-zA-Z_ ]+&? ?}}[[VAR:[a-z_]+]] : *pt) + // CHECK-NEXT: printf("I found %d\n", [[VAR]]); + + S s; + for (S::const_iterator it = s.begin(), e = s.end(); it != e; ++it) { + printf("s has value %d\n", (*it).x); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : s) + // CHECK-NEXT: printf("s has value %d\n", ([[VAR]]).x); + + S *ps; + for (S::const_iterator it = ps->begin(), e = ps->end(); it != e; ++it) { + printf("s has value %d\n", (*it).x); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : *ps) + // CHECK-NEXT: printf("s has value %d\n", ([[VAR]]).x); + + for (S::const_iterator it = s.begin(), e = s.end(); it != e; ++it) { + printf("s has value %d\n", it->x); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : s) + // CHECK-NEXT: printf("s has value %d\n", [[VAR]].x); + + for (S::iterator it = s.begin(), e = s.end(); it != e; ++it) { + it->x = 3; + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : s) + // CHECK-NEXT: [[VAR]].x = 3; + + for (S::iterator it = s.begin(), e = s.end(); it != e; ++it) { + (*it).x = 3; + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : s) + // CHECK-NEXT: ([[VAR]]).x = 3; + + for (S::iterator it = s.begin(), e = s.end(); it != e; ++it) { + it->nonConstFun(4, 5); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : s) + // CHECK-NEXT: [[VAR]].nonConstFun(4, 5); + + U u; + for (U::iterator it = u.begin(), e = u.end(); it != e; ++it) { + printf("s has value %d\n", it->x); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : u) + // CHECK-NEXT: printf("s has value %d\n", [[VAR]].x); + + for (U::iterator it = u.begin(), e = u.end(); it != e; ++it) { + printf("s has value %d\n", (*it).x); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : u) + // CHECK-NEXT: printf("s has value %d\n", ([[VAR]]).x); + + U::iterator A; + for (U::iterator i = u.begin(), e = u.end(); i != e; ++i) + int k = A->x + i->x; + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : u) + // CHECK-NEXT: int k = A->x + [[VAR]].x; + + dependent v; + for (dependent::const_iterator it = v.begin(), e = v.end(); + it != e; ++it) { + printf("Fibonacci number is %d\n", *it); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : v) + // CHECK-NEXT: printf("Fibonacci number is %d\n", [[VAR]]); + + for (dependent::const_iterator it(v.begin()), e = v.end(); + it != e; ++it) { + printf("Fibonacci number is %d\n", *it); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : v) + // CHECK-NEXT: printf("Fibonacci number is %d\n", [[VAR]]); + + doublyDependent intmap; + for (doublyDependent::iterator it = intmap.begin(), e = intmap.end(); + it != e; ++it) { + printf("intmap[%d] = %d", it->first, it->second); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : intmap) + // CHECK-NEXT: printf("intmap[%d] = %d", [[VAR]].first, [[VAR]].second); + +} Index: test/loop-convert/loop-convert-naming.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-naming.cpp @@ -0,0 +1,68 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert . %t.cpp -- -I %S/Inputs \ +// RUN: && FileCheck -input-file=%t.cpp %s + +#include "structures.h" + +const int N = 10; +int nums[N]; +int sum = 0; + +Val Arr[N]; +Val &func(Val &); + +void aliasing() { + // The extra blank braces are left as a placeholder for after the variable + // declaration is deleted. + for (int i = 0; i < N; ++i) { + Val &t = Arr[i]; { } + int y = t.x; + } + // CHECK: for (auto & t : Arr) + // CHECK-NEXT: { } + // CHECK-NEXT: int y = t.x; + + for (int i = 0; i < N; ++i) { + Val &t = Arr[i]; + int y = t.x; + int z = Arr[i].x + t.x; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : Arr) + // CHECK-NEXT: Val &t = [[VAR]]; + // CHECK-NEXT: int y = t.x; + // CHECK-NEXT: int z = [[VAR]].x + t.x; + + for (int i = 0; i < N; ++i) { + Val t = Arr[i]; + int y = t.x; + int z = Arr[i].x + t.x; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : Arr) + // CHECK-NEXT: Val t = [[VAR]]; + // CHECK-NEXT: int y = t.x; + // CHECK-NEXT: int z = [[VAR]].x + t.x; + + for (int i = 0; i < N; ++i) { + Val &t = func(Arr[i]); + int y = t.x; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : Arr) + // CHECK-NEXT: Val &t = func([[VAR]]); + // CHECK-NEXT: int y = t.x; +} + +void sameNames() { + int num = 0; + for (int i = 0; i < N; ++i) { + printf("Fibonacci number is %d\n", nums[i]); + sum += nums[i] + 2 + num; + (void) nums[i]; + } + // CHECK: int num = 0; + // CHECK-NEXT: for (auto & [[VAR:[a-z_]+]] : nums) + // CHECK-NEXT: printf("Fibonacci number is %d\n", [[VAR]]); + // CHECK-NEXT: sum += [[VAR]] + 2 + num; + // CHECK-NOT: (void) num; + // CHECK: } +} Index: test/loop-convert/loop-convert-negative-iterator.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-negative-iterator.cpp @@ -0,0 +1,161 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert . %t.cpp -- -I %S/Inputs \ +// RUN: && FileCheck -input-file=%t.cpp %s + +#include "structures.h" + +// Single FileCheck line to make sure that no loops are converted. +// CHECK-NOT: for ({{.*[^:]:[^:].*}}) + +S s; +T t; +U u; + +struct BadBeginEnd : T { + iterator notBegin(); + iterator notEnd(); +}; + +void notBeginOrEnd() { + BadBeginEnd Bad; + for (T::iterator i = Bad.notBegin(), e = Bad.end(); i != e; ++i) + int k = *i; + + for (T::iterator i = Bad.begin(), e = Bad.notEnd(); i != e; ++i) + int k = *i; +} + +void badLoopShapes() { + for (T::iterator i = t.begin(), e = t.end(), f = e; i != e; ++i) + int k = *i; + + for (T::iterator i = t.begin(), e = t.end(); i != e; ) + int k = *i; + + for (T::iterator i = t.begin(), e = t.end(); ; ++i) + int k = *i; + + T::iterator outsideI; + T::iterator outsideE; + + for (; outsideI != outsideE ; ++outsideI) + int k = *outsideI; +} + +void iteratorArrayMix() { + int lower; + const int N = 6; + for (T::iterator i = t.begin(), e = t.end(); lower < N; ++i) + int k = *i; + + for (T::iterator i = t.begin(), e = t.end(); lower < N; ++lower) + int k = *i; +} + +struct ExtraConstructor : T::iterator { + ExtraConstructor(T::iterator, int); + explicit ExtraConstructor(T::iterator); +}; + +void badConstructor() { + for (T::iterator i = ExtraConstructor(t.begin(), 0), e = t.end(); + i != e; ++i) + int k = *i; + for (T::iterator i = ExtraConstructor(t.begin()), e = t.end(); i != e; ++i) + int k = *i; +} + +void iteratorMemberUsed() { + for (T::iterator i = t.begin(), e = t.end(); i != e; ++i) + i.x = *i; + + for (T::iterator i = t.begin(), e = t.end(); i != e; ++i) + int k = i.x + *i; + + for (T::iterator i = t.begin(), e = t.end(); i != e; ++i) + int k = e.x + *i; +} + +void iteratorMethodCalled() { + for (T::iterator i = t.begin(), e = t.end(); i != e; ++i) + i.insert(3); + + for (T::iterator i = t.begin(), e = t.end(); i != e; ++i) + if (i != i) + int k = 3; +} + +void iteratorOperatorCalled() { + for (T::iterator i = t.begin(), e = t.end(); i != e; ++i) + int k = *(++i); + + for (S::iterator i = s.begin(), e = s.end(); i != e; ++i) + MutableVal k = *(++i); +} + +void differentContainers() { + T other; + for (T::iterator i = t.begin(), e = other.end(); i != e; ++i) + int k = *i; + + for (T::iterator i = other.begin(), e = t.end(); i != e; ++i) + int k = *i; + + S otherS; + for (S::iterator i = s.begin(), e = otherS.end(); i != e; ++i) + MutableVal k = *i; + + for (S::iterator i = otherS.begin(), e = s.end(); i != e; ++i) + MutableVal k = *i; +} + +void wrongIterators() { + T::iterator other; + for (T::iterator i = t.begin(), e = t.end(); i != other; ++i) + int k = *i; +} + +struct EvilArrow : U { + // Please, no one ever write code like this. + U* operator->(); +}; + +void differentMemberAccessTypes() { + EvilArrow A; + for (EvilArrow::iterator i = A.begin(), e = A->end(); i != e; ++i) + Val k = *i; + for (EvilArrow::iterator i = A->begin(), e = A.end(); i != e; ++i) + Val k = *i; +} + +void f(const T::iterator &it, int); +void f(const T &it, int); +void g(T &it, int); + +void iteratorPassedToFunction() { + for (T::iterator i = t.begin(), e = t.end(); i != e; ++i) + f(i, *i); +} + +// FIXME: Disallow this except for containers passed by value and/or const +// reference. Or maybe this is correct enough for any container? +void containerPassedToFunction() { +// for (T::iterator i = t.begin(), e = t.end(); i != e; ++i) +// f(t, *i); +// for (T::iterator i = t.begin(), e = t.end(); i != e; ++i) +// g(t, *i); +} + +// FIXME: These tests can be removed if this tool ever does enough analysis to +// decide that this is a safe transformation. +// Until then, we don't want it applied. +void iteratorDefinedOutside() { + T::iterator theEnd = t.end(); + for (T::iterator i = t.begin(); i != theEnd; ++i) + int k = *i; + + T::iterator theBegin = t.begin(); + for (T::iterator e = t.end(); theBegin != e; ++theBegin) + int k = *theBegin; +} Index: test/loop-convert/loop-convert-negative-multi-end-call.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-negative-multi-end-call.cpp @@ -0,0 +1,65 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert -A0 . %t.cpp -- -I %S/Inputs \ +// RUN: && FileCheck -input-file=%t.cpp %s + +#include "structures.h" + +// Single FileCheck line to make sure that no loops are converted. +// CHECK-NOT: for ({{.*[^:]:[^:].*}}) + +S s; +T t; +U u; + +void multipleEnd() { + for (S::iterator i = s.begin(); i != s.end(); ++i) + MutableVal k = *i; + + for (T::iterator i = t.begin(); i != t.end(); ++i) + int k = *i; + + for (U::iterator i = u.begin(); i != u.end(); ++i) + Val k = *i; +} + +void f(X); +void f(S); +void f(T); + +void complexContainer() { + X x; + for (S::iterator i = x.s.begin(), e = x.s.end(); i != e; ++i) { + f(x); + MutableVal k = *i; + } + + for (T::iterator i = x.t.begin(), e = x.t.end(); i != e; ++i) { + f(x); + int k = *i; + } + + for (S::iterator i = x.s.begin(), e = x.s.end(); i != e; ++i) { + f(x.s); + MutableVal k = *i; + } + + for (T::iterator i = x.t.begin(), e = x.t.end(); i != e; ++i) { + f(x.t); + int k = *i; + } + + for (S::iterator i = x.getS().begin(), e = x.getS().end(); i != e; ++i) { + f(x.getS()); + MutableVal k = *i; + } + + X exes[5]; + int index = 0; + + for (S::iterator i = exes[index].getS().begin(), + e = exes[index].getS().end(); i != e; ++i) { + index++; + MutableVal k = *i; + } +} Index: test/loop-convert/loop-convert-negative-pseudoarray.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-negative-pseudoarray.cpp @@ -0,0 +1,130 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert -A1 . %t.cpp -- -I %S/Inputs \ +// RUN: && FileCheck -input-file=%t.cpp %s + +#include "structures.h" + +// Single FileCheck line to make sure that no loops are converted. +// CHECK-NOT: for ({{.*[^:]:[^:].*}}) + +const int N = 6; +dependent v; +dependent *pv; + +transparent > cv; +int sum = 0; + +// Checks for the index start and end: +void indexStartAndEnd() { + for (int i = 0; i < v.size() + 1; ++i) + sum += v[i]; + + for (int i = 0; i < v.size() - 1; ++i) + sum += v[i]; + + for (int i = 1; i < v.size(); ++i) + sum += v[i]; + + for (int i = 1; i < v.size(); ++i) + sum += v[i]; + + for (int i = 0; ; ++i) + sum += (*pv)[i]; +} + +// Checks for invalid increment steps: +void increment() { + for (int i = 0; i < v.size(); --i) + sum += v[i]; + + for (int i = 0; i < v.size(); i) + sum += v[i]; + + for (int i = 0; i < v.size();) + sum += v[i]; + + for (int i = 0; i < v.size(); i += 2) + sum ++; +} + +// Checks to make sure that the index isn't used outside of the container: +void indexUse() { + for (int i = 0; i < v.size(); ++i) + v[i] += 1 + i; +} + +// Checks for incorrect loop variables. +void mixedVariables() { + int badIndex; + for (int i = 0; badIndex < v.size(); ++i) + sum += v[i]; + + for (int i = 0; i < v.size(); ++badIndex) + sum += v[i]; + + for (int i = 0; badIndex < v.size(); ++badIndex) + sum += v[i]; + + for (int i = 0; badIndex < v.size(); ++badIndex) + sum += v[badIndex]; +} + +// Checks for an array indexed in addition to the container. +void multipleArrays() { + int badArr[N]; + + for (int i = 0; i < v.size(); ++i) + sum += v[i] + badArr[i]; + + for (int i = 0; i < v.size(); ++i) + sum += badArr[i]; + + for (int i = 0; i < v.size(); ++i) { + int k = badArr[i]; + sum += k + 2; + } + + for (int i = 0; i < v.size(); ++i) { + int k = badArr[i]; + sum += v[i] + k; + } +} + +// Checks for multiple containers being indexed container. +void multipleContainers() { + dependent badArr; + + for (int i = 0; i < v.size(); ++i) + sum += v[i] + badArr[i]; + + for (int i = 0; i < v.size(); ++i) + sum += badArr[i]; + + for (int i = 0; i < v.size(); ++i) { + int k = badArr[i]; + sum += k + 2; + } + + for (int i = 0; i < v.size(); ++i) { + int k = badArr[i]; + sum += v[i] + k; + } +} + +// Check to make sure that dereferenced pointers-to-containers behave nicely +void derefContainer() { + // Note the dependent::operator*() returns another dependent. + // This test makes sure that we don't allow an arbitrary number of *'s. + for (int i = 0; i < pv->size(); ++i) + sum += (**pv).at(i); + + for (int i = 0; i < pv->size(); ++i) + sum += (**pv)[i]; +} + +void wrongEnd() { + int bad; + for (int i = 0, e = v.size(); i < bad; ++i) + sum += v[i]; +} Index: test/loop-convert/loop-convert-negative.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-negative.cpp @@ -0,0 +1,125 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %S/Inputs/negative-header.h > \ +// RUN: %T/negative-header.h +// RUN: loop-convert . %t.cpp -- -I %S/Inputs/ \ +// RUN: && FileCheck -input-file=%t.cpp %s \ +// RUN: && FileCheck -input-file=%T/negative-header.h \ +// RUN: %S/Inputs/negative-header.h + +#include "negative-header.h" +#include "structures.h" + +// Single FileCheck line to make sure that no loops are converted. +// CHECK-NOT: for ({{.*[^:]:[^:].*}}) + +const int N = 6; +int arr[N] = {1, 2, 3, 4, 5, 6}; +int (*pArr)[N] = &arr; +int sum = 0; + +// Checks for the index start and end: +void indexStartAndEnd() { + for (int i = 0; i < N + 1; ++i) + sum += arr[i]; + + for (int i = 0; i < N - 1; ++i) + sum += arr[i]; + + for (int i = 1; i < N; ++i) + sum += arr[i]; + + for (int i = 1; i < N; ++i) + sum += arr[i]; + + for (int i = 0; ; ++i) + sum += (*pArr)[i]; +} + +// Checks for invalid increment steps: +void increment() { + for (int i = 0; i < N; --i) + sum += arr[i]; + + for (int i = 0; i < N; i) + sum += arr[i]; + + for (int i = 0; i < N;) + sum += arr[i]; + + for (int i = 0; i < N; i += 2) + sum ++; +} + +// Checks to make sure that the index isn't used outside of the array: +void indexUse() { + for (int i = 0; i < N; ++i) + arr[i] += 1 + i; +} + +// Check for loops that don't mention arrays +void noArray() { + for (int i = 0; i < N; ++i) + sum += i; + + for (int i = 0; i < N; ++i) { } + + for (int i = 0; i < N; ++i) ; +} + +// Checks for incorrect loop variables. +void mixedVariables() { + int badIndex; + for (int i = 0; badIndex < N; ++i) + sum += arr[i]; + + for (int i = 0; i < N; ++badIndex) + sum += arr[i]; + + for (int i = 0; badIndex < N; ++badIndex) + sum += arr[i]; + + for (int i = 0; badIndex < N; ++badIndex) + sum += arr[badIndex]; +} + +// Checks for multiple arrays indexed. +void multipleArrays() { + int badArr[N]; + + for (int i = 0; i < N; ++i) + sum += arr[i] + badArr[i]; + + for (int i = 0; i < N; ++i) { + int k = badArr[i]; + sum += arr[i] + k; + } +} + +struct HasArr { + int Arr[N]; + Val ValArr[N]; +}; + +struct HasIndirectArr { + HasArr HA; + void implicitThis() { + for (int i = 0; i < N; ++i) { + printf("%d", HA.Arr[i]); + } + + for (int i = 0; i < N; ++i) { + printf("%d", HA.ValArr[i].x); + } + } + + void explicitThis() { + for (int i = 0; i < N; ++i) { + printf("%d", this->HA.Arr[i]); + } + + for (int i = 0; i < N; ++i) { + printf("%d", this->HA.ValArr[i].x); + } + } +}; Index: test/loop-convert/loop-convert-nesting.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-nesting.cpp @@ -0,0 +1,58 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert . %t.cpp -- -I %S/Inputs \ +// RUN: && FileCheck -input-file=%t.cpp %s + +#include "structures.h" + +void f() { + const int N = 10; + const int M = 15; + Val Arr[N]; + for (int i = 0; i < N; ++i) { + for (int j = 0; j < N; ++j) { + int k = Arr[i].x + Arr[j].x; + // The repeat is there to allow FileCheck to make sure the two variable + // names aren't the same. + int l = Arr[i].x + Arr[j].x; + } + } + // CHECK: for (auto & [[VAR:[a-zA-Z_]+]] : Arr) + // CHECK-NEXT: for (auto & [[INNERVAR:[a-zA-Z_]+]] : Arr) + // CHECK-NEXT: int k = [[VAR]].x + [[INNERVAR]].x; + // CHECK-NOT: int l = [[VAR]].x + [[VAR]].x; + + Val Nest[N][M]; + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + printf("Got item %d", Nest[i][j].x); + } + } + // The inner loop is also convertible, but doesn't need to be converted + // immediately. Update this test when that changes! + // CHECK: for (auto & [[VAR:[a-zA-Z_]+]] : Nest) + // CHECK-NEXT: for (int j = 0; j < M; ++j) + // CHECK-NEXT: printf("Got item %d", [[VAR]][j].x); + + // Note that the order of M and N are switched for this test. + for (int j = 0; j < M; ++j) { + for (int i = 0; i < N; ++i) { + printf("Got item %d", Nest[i][j].x); + } + } + // CHECK-NOT: for (auto & {{[a-zA-Z_]+}} : Nest[i]) + // CHECK: for (int j = 0; j < M; ++j) + // CHECK-NEXT: for (auto & [[VAR:[a-zA-Z_]+]] : Nest) + // CHECK-NEXT: printf("Got item %d", [[VAR]][j].x); + Nested NestT; + for (Nested::iterator I = NestT.begin(), E = NestT.end(); I != E; ++I) { + for (T::iterator TI = (*I).begin(), TE = (*I).end(); TI != TE; ++TI) { + printf("%d", *TI); + } + } + // The inner loop is also convertible, but doesn't need to be converted + // immediately. Update this test when that changes! + // CHECK: for (auto & [[VAR:[a-zA-Z_]+]] : NestT) { + // CHECK-NEXT: for (T::iterator TI = ([[VAR]]).begin(), TE = ([[VAR]]).end(); TI != TE; ++TI) { + // CHECK-NEXT: printf("%d", *TI); +} Index: test/loop-convert/loop-convert-nocompile.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-nocompile.cpp @@ -0,0 +1,23 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert . %t.cpp -- -I %S/Inputs \ +// RUN: || FileCheck -input-file=%t.cpp %s +// Note that this test expects the compilation to fail! + +void valid() { + const int arr[5]; + int sum = 0; + for (int i = 0; i < 5; ++i) { + sum += arr[i]; + } +} +void hasSyntaxError = 3; +// CHECK: void valid() { +// CHECK-NEXT: const int arr[5]; +// CHECK-NEXT: int sum = 0; +// CHECK-NEXT: for (int i = 0; i < 5; ++i) { +// CHECK-NEXT: sum += arr[i]; +// CHECK-NEXT: } +// CHECK-NEXT: } + +// CHECK-NEXT: void hasSyntaxError = 3; Index: test/loop-convert/loop-convert-pseudoarray.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-pseudoarray.cpp @@ -0,0 +1,68 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert . %t.cpp -- -I %S/Inputs \ +// RUN: && FileCheck -input-file=%t.cpp %s +// RUN: rm -rf %t.cpp +#include "structures.h" + +const int N = 6; +dependent v; +dependent *pv; + +transparent > cv; + +void f() { + int sum = 0; + for (int i = 0, e = v.size(); i < e; ++i) { + printf("Fibonacci number is %d\n", v[i]); + sum += v[i] + 2; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : v) + // CHECK-NEXT: printf("Fibonacci number is %d\n", [[VAR]]); + // CHECK-NEXT: sum += [[VAR]] + 2; + + for (int i = 0, e = v.size(); i < e; ++i) { + printf("Fibonacci number is %d\n", v.at(i)); + sum += v.at(i) + 2; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : v) + // CHECK-NEXT: printf("Fibonacci number is %d\n", [[VAR]]); + // CHECK-NEXT: sum += [[VAR]] + 2; + + for (int i = 0, e = pv->size(); i < e; ++i) { + printf("Fibonacci number is %d\n", pv->at(i)); + sum += pv->at(i) + 2; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : *pv) + // CHECK-NEXT: printf("Fibonacci number is %d\n", [[VAR]]); + // CHECK-NEXT: sum += [[VAR]] + 2; + + // This test will fail if size() isn't called repeatedly, since it + // returns unsigned int, and 0 is deduced to be signed int. + // FIXME: Insert the necessary explicit conversion, or write out the types + // explicitly. + for (int i = 0; i < pv->size(); ++i) { + printf("Fibonacci number is %d\n", (*pv).at(i)); + sum += (*pv)[i] + 2; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : *pv) + // CHECK-NEXT: printf("Fibonacci number is %d\n", [[VAR]]); + // CHECK-NEXT: sum += [[VAR]] + 2; + + for (int i = 0; i < cv->size(); ++i) { + printf("Fibonacci number is %d\n", cv->at(i)); + sum += cv->at(i) + 2; + } + // CHECK: for (auto & [[VAR:[a-z_]+]] : *cv) + // CHECK-NEXT: printf("Fibonacci number is %d\n", [[VAR]]); + // CHECK-NEXT: sum += [[VAR]] + 2; +} + +// Check for loops that don't mention containers +void noContainer() { + for (auto i = 0; i < v.size(); ++i) { } + // CHECK: for (auto & [[VAR:[a-z_]+]] : v) { } + + for (auto i = 0; i < v.size(); ++i) ; + // CHECK: for (auto & [[VAR:[a-z_]+]] : v) ; +} Index: test/loop-convert/loop-convert-single-iterator.cpp =================================================================== --- /dev/null +++ test/loop-convert/loop-convert-single-iterator.cpp @@ -0,0 +1,118 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert . %t.cpp -- -I %S/Inputs \ +// RUN: && FileCheck -input-file=%t.cpp %s +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: rm -rf %t.cpp + +#include "structures.h" + +void complexContainer() { + X exes[5]; + int index = 0; + + for (S::iterator i = exes[index].getS().begin(), e = exes[index].getS().end(); i != e; ++i) { + MutableVal k = *i; + MutableVal j = *i; + } + // CHECK: for ({{[a-zA-Z_ ]+&? ?}}[[VAR:[a-z_]+]] : exes[index].getS()) + // CHECK-NEXT: MutableVal k = [[VAR]]; + // CHECK-NEXT: MutableVal j = [[VAR]]; +} + +void f() { + /// begin()/end() - based for loops here: + T t; + for (T::iterator it = t.begin(); it != t.end(); ++it) { + printf("I found %d\n", *it); + } + // CHECK: for ({{[a-zA-Z_ ]+&? ?}}[[VAR:[a-z_]+]] : t) + // CHECK-NEXT: printf("I found %d\n", [[VAR]]); + + T *pt; + for (T::iterator it = pt->begin(); it != pt->end(); ++it) { + printf("I found %d\n", *it); + } + // CHECK: for ({{[a-zA-Z_ ]+&? ?}}[[VAR:[a-z_]+]] : *pt) + // CHECK-NEXT: printf("I found %d\n", [[VAR]]); + + S s; + for (S::const_iterator it = s.begin(); it != s.end(); ++it) { + printf("s has value %d\n", (*it).x); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : s) + // CHECK-NEXT: printf("s has value %d\n", ([[VAR]]).x); + + S *ps; + for (S::const_iterator it = ps->begin(); it != ps->end(); ++it) { + printf("s has value %d\n", (*it).x); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : *ps) + // CHECK-NEXT: printf("s has value %d\n", ([[VAR]]).x); + + for (S::const_iterator it = s.begin(); it != s.end(); ++it) { + printf("s has value %d\n", it->x); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : s) + // CHECK-NEXT: printf("s has value %d\n", [[VAR]].x); + + for (S::iterator it = s.begin(); it != s.end(); ++it) { + it->x = 3; + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : s) + // CHECK-NEXT: [[VAR]].x = 3; + + for (S::iterator it = s.begin(); it != s.end(); ++it) { + (*it).x = 3; + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : s) + // CHECK-NEXT: ([[VAR]]).x = 3; + + for (S::iterator it = s.begin(); it != s.end(); ++it) { + it->nonConstFun(4, 5); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : s) + // CHECK-NEXT: [[VAR]].nonConstFun(4, 5); + + U u; + for (U::iterator it = u.begin(); it != u.end(); ++it) { + printf("s has value %d\n", it->x); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : u) + // CHECK-NEXT: printf("s has value %d\n", [[VAR]].x); + + for (U::iterator it = u.begin(); it != u.end(); ++it) { + printf("s has value %d\n", (*it).x); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : u) + // CHECK-NEXT: printf("s has value %d\n", ([[VAR]]).x); + + U::iterator A; + for (U::iterator i = u.begin(); i != u.end(); ++i) + int k = A->x + i->x; + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : u) + // CHECK-NEXT: int k = A->x + [[VAR]].x; + + dependent v; + for (dependent::const_iterator it = v.begin(); + it != v.end(); ++it) { + printf("Fibonacci number is %d\n", *it); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : v) + // CHECK-NEXT: printf("Fibonacci number is %d\n", [[VAR]]); + + for (dependent::const_iterator it(v.begin()); + it != v.end(); ++it) { + printf("Fibonacci number is %d\n", *it); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : v) + // CHECK-NEXT: printf("Fibonacci number is %d\n", [[VAR]]); + + doublyDependent intmap; + for (doublyDependent::iterator it = intmap.begin(); + it != intmap.end(); ++it) { + printf("intmap[%d] = %d", it->first, it->second); + } + // CHECK: for ({{[a-zA-Z_ ]*&? ?}}[[VAR:[a-z_]+]] : intmap) + // CHECK-NEXT: printf("intmap[%d] = %d", [[VAR]].first, [[VAR]].second); +} Index: test/loop-convert/negative-pseudoarray-extra.cpp =================================================================== --- /dev/null +++ test/loop-convert/negative-pseudoarray-extra.cpp @@ -0,0 +1,30 @@ +// RUN: rm -rf %t.cpp +// RUN: grep -Ev "//\s*[A-Z-]+:" %s > %t.cpp +// RUN: loop-convert -A1 . %t.cpp -- -I %S/Inputs \ +// RUN: && FileCheck -input-file=%t.cpp %s + +#include "structures.h" + +// Single FileCheck line to make sure that no loops are converted. +// CHECK-NOT: for ({{.*[^:]:[^:].*}}) + +const int N = 6; +dependent v; +dependent *pv; + +int sum = 0; + +// Checks to see that non-const member functions are not called on the container +// object. +// These could be conceivably allowed with a lower required confidence level. +void memberFunctionCalled() { + for (int i = 0; i < v.size(); ++i) { + sum += v[i]; + v.foo(); + } + + for (int i = 0; i < v.size(); ++i) { + sum += v[i]; + dependent::iterator it = v.begin(); + } +}