Index: include/clang/Lex/Lexer.h =================================================================== --- include/clang/Lex/Lexer.h +++ include/clang/Lex/Lexer.h @@ -466,6 +466,13 @@ const LangOptions &LangOpts, unsigned MaxLines = 0); + /// Finds the token that comes right after the given location. + /// + /// Returns the next token, or none if the location is inside a macro. + static Optional findNextToken(SourceLocation Loc, + const SourceManager &SM, + const LangOptions &LangOpts); + /// \brief Checks that the given token is the first token that occurs after /// the given location (this excludes comments and whitespace). Returns the /// location immediately after the specified token. If the token is not found Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -1212,18 +1212,12 @@ } } -/// \brief Checks that the given token is the first token that occurs after the -/// given location (this excludes comments and whitespace). Returns the location -/// immediately after the specified token. If the token is not found or the -/// location is inside a macro, the returned source location will be invalid. -SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, - tok::TokenKind TKind, - const SourceManager &SM, - const LangOptions &LangOpts, - bool SkipTrailingWhitespaceAndNewLine) { +Optional Lexer::findNextToken(SourceLocation Loc, + const SourceManager &SM, + const LangOptions &LangOpts) { if (Loc.isMacroID()) { if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) - return SourceLocation(); + return None; } Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); @@ -1234,7 +1228,7 @@ bool InvalidTemp = false; StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); if (InvalidTemp) - return SourceLocation(); + return None; const char *TokenBegin = File.data() + LocInfo.second; @@ -1244,15 +1238,25 @@ // Find the token. Token Tok; lexer.LexFromRawLexer(Tok); - if (Tok.isNot(TKind)) + return Tok; +} + +/// \brief Checks that the given token is the first token that occurs after the +/// given location (this excludes comments and whitespace). Returns the location +/// immediately after the specified token. If the token is not found or the +/// location is inside a macro, the returned source location will be invalid. +SourceLocation Lexer::findLocationAfterToken( + SourceLocation Loc, tok::TokenKind TKind, const SourceManager &SM, + const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine) { + Optional Tok = findNextToken(Loc, SM, LangOpts); + if (!Tok || Tok->isNot(TKind)) return SourceLocation(); - SourceLocation TokenLoc = Tok.getLocation(); + SourceLocation TokenLoc = Tok->getLocation(); // Calculate how much whitespace needs to be skipped if any. unsigned NumWhitespaceChars = 0; if (SkipTrailingWhitespaceAndNewLine) { - const char *TokenEnd = SM.getCharacterData(TokenLoc) + - Tok.getLength(); + const char *TokenEnd = SM.getCharacterData(TokenLoc) + Tok->getLength(); unsigned char C = *TokenEnd; while (isHorizontalWhitespace(C)) { C = *(++TokenEnd); @@ -1269,7 +1273,7 @@ } } - return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars); + return TokenLoc.getLocWithOffset(Tok->getLength() + NumWhitespaceChars); } /// getCharAndSizeSlow - Peek a single 'character' from the specified buffer, Index: lib/Tooling/Refactoring/CMakeLists.txt =================================================================== --- lib/Tooling/Refactoring/CMakeLists.txt +++ lib/Tooling/Refactoring/CMakeLists.txt @@ -5,6 +5,7 @@ ASTSelectionRequirements.cpp AtomicChange.cpp Extract.cpp + SourceExtraction.cpp RefactoringActions.cpp Rename/RenamingAction.cpp Rename/SymbolOccurrences.cpp Index: lib/Tooling/Refactoring/Extract.cpp =================================================================== --- lib/Tooling/Refactoring/Extract.cpp +++ lib/Tooling/Refactoring/Extract.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Refactoring/Extract/Extract.h" +#include "SourceExtraction.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Expr.h" #include "clang/Rewrite/Core/Rewriter.h" @@ -131,6 +132,8 @@ PP.SuppressLifetimeQualifiers = true; PP.SuppressUnwrittenScope = true; + ExtractionSemicolonPolicy Semicolons = ExtractionSemicolonPolicy::compute( + Code[Code.size() - 1], ExtractedRange, SM, LangOpts); AtomicChange Change(SM, ExtractedDeclLocation); // Create the replacement for the extracted declaration. { @@ -148,8 +151,8 @@ if (IsExpr && !ReturnType->isVoidType()) OS << "return "; OS << ExtractedCodeRewriter.getRewrittenText(ExtractedRange); - // FIXME: Compute the correct semicolon policy. - OS << ';'; + if (Semicolons.IsNeededInExtractedFunction) + OS << ';'; OS << "\n}\n\n"; auto Err = Change.insert(SM, ExtractedDeclLocation, OS.str()); if (Err) @@ -164,7 +167,8 @@ OS << DeclName << '('; // FIXME: Forward arguments. OS << ')'; - // FIXME: Add semicolon if needed. + if (Semicolons.IsNeededInOriginalFunction) + OS << ';'; auto Err = Change.replace( SM, CharSourceRange::getTokenRange(ExtractedRange), OS.str()); Index: lib/Tooling/Refactoring/SourceExtraction.h =================================================================== --- /dev/null +++ lib/Tooling/Refactoring/SourceExtraction.h @@ -0,0 +1,49 @@ +//===--- SourceExtraction.cpp - Clang refactoring library -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H +#define LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H + +#include "clang/Basic/LLVM.h" + +namespace clang { + +class LangOptions; +class SourceManager; +class SourceRange; +class Stmt; + +namespace tooling { + +/// Determines which semicolons should be inserted during extraction. +struct ExtractionSemicolonPolicy { + bool IsNeededInExtractedFunction; + bool IsNeededInOriginalFunction; + + /// Returns the semicolon insertion policy that is needed for extraction of + /// the given statement from the given source range. + static ExtractionSemicolonPolicy compute(const Stmt *S, + SourceRange &ExtractedRange, + const SourceManager &SM, + const LangOptions &LangOpts); + +private: + static ExtractionSemicolonPolicy neededInExtractedFunction() { + return {true, false}; + } + static ExtractionSemicolonPolicy neededInOriginalFunction() { + return {false, true}; + } + static ExtractionSemicolonPolicy neededInBoth() { return {true, true}; } +}; + +} // end namespace tooling +} // end namespace clang + +#endif // LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H Index: lib/Tooling/Refactoring/SourceExtraction.cpp =================================================================== --- /dev/null +++ lib/Tooling/Refactoring/SourceExtraction.cpp @@ -0,0 +1,105 @@ +//===--- SourceExtraction.cpp - Clang refactoring library -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SourceExtraction.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtObjC.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" + +using namespace clang; + +namespace { + +/// Returns true if the token at the given location is a semicolon. +bool isSemicolonAtLocation(SourceLocation TokenLoc, const SourceManager &SM, + const LangOptions &LangOpts) { + return Lexer::getSourceText( + CharSourceRange::getTokenRange(TokenLoc, TokenLoc), SM, + LangOpts) == ";"; +} + +/// Returns true if there should be a semicolon after the given statement. +bool isSemicolonRequiredAfter(const Stmt *S) { + if (isa(S)) + return false; + if (const auto *If = dyn_cast(S)) + return isSemicolonRequiredAfter(If->getElse() ? If->getElse() + : If->getThen()); + if (const auto *While = dyn_cast(S)) + return isSemicolonRequiredAfter(While->getBody()); + if (const auto *For = dyn_cast(S)) + return isSemicolonRequiredAfter(For->getBody()); + if (const auto *CXXFor = dyn_cast(S)) + return isSemicolonRequiredAfter(CXXFor->getBody()); + if (const auto *ObjCFor = dyn_cast(S)) + return isSemicolonRequiredAfter(ObjCFor->getBody()); + switch (S->getStmtClass()) { + case Stmt::SwitchStmtClass: + case Stmt::CXXTryStmtClass: + case Stmt::ObjCAtSynchronizedStmtClass: + case Stmt::ObjCAutoreleasePoolStmtClass: + case Stmt::ObjCAtTryStmtClass: + return false; + default: + return true; + } +} + +/// Returns true if the two source locations are on the same line. +bool areOnSameLine(SourceLocation Loc1, SourceLocation Loc2, + const SourceManager &SM) { + return !Loc1.isMacroID() && !Loc2.isMacroID() && + SM.getSpellingLineNumber(Loc1) == SM.getSpellingLineNumber(Loc2); +} + +} // end anonymous namespace + +namespace clang { +namespace tooling { + +ExtractionSemicolonPolicy +ExtractionSemicolonPolicy::compute(const Stmt *S, SourceRange &ExtractedRange, + const SourceManager &SM, + const LangOptions &LangOpts) { + /// The extracted expression should be terminated with a ';'. The call to + /// the extracted function will replace this expression, so it won't need + /// a terminating ';'. + if (isa(S)) + return ExtractionSemicolonPolicy::neededInExtractedFunction(); + + /// Some statements don't need to be terminated with ';'. The call to the + /// extracted function will be a standalone statement, so it should be + /// terminated with a ';'. + bool NeedsSemi = isSemicolonRequiredAfter(S); + if (!NeedsSemi) + return ExtractionSemicolonPolicy::neededInOriginalFunction(); + + /// Some statements might end at ';'. The extraction will move that ';', so + /// the call to the extracted function should be terminated with a ';'. + SourceLocation End = ExtractedRange.getEnd(); + if (isSemicolonAtLocation(End, SM, LangOpts)) + return ExtractionSemicolonPolicy::neededInOriginalFunction(); + + /// Other statements should generally have a trailing ';'. We can try to find + /// it and move it together it with the extracted code. + Optional NextToken = Lexer::findNextToken(End, SM, LangOpts); + if (NextToken && NextToken->is(tok::semi) && + areOnSameLine(NextToken->getLocation(), End, SM)) { + ExtractedRange.setEnd(NextToken->getLocation()); + return ExtractionSemicolonPolicy::neededInOriginalFunction(); + } + + /// Otherwise insert semicolons in both places. + return ExtractionSemicolonPolicy::neededInBoth(); +} + +} // end namespace tooling +} // end namespace clang Index: test/Refactor/Extract/ExtractExprIntoFunction.cpp =================================================================== --- test/Refactor/Extract/ExtractExprIntoFunction.cpp +++ test/Refactor/Extract/ExtractExprIntoFunction.cpp @@ -20,10 +20,10 @@ // CHECK: 1 'astatement' results: // CHECK: static void extracted() { // CHECK-NEXT: int a = 1; -// CHECK-NEXT: int b = 2;;{{$}} +// CHECK-NEXT: int b = 2;{{$}} // CHECK-NEXT: }{{[[:space:]].*}} // CHECK-NEXT: void simpleExtractStmtNoCaptures() { -// CHECK-NEXT: /*range astatement=->+1:13*/extracted(){{$}} +// CHECK-NEXT: /*range astatement=->+1:13*/extracted();{{$}} // CHECK-NEXT: } Index: test/Refactor/Extract/ExtractionSemicolonPolicy.cpp =================================================================== --- /dev/null +++ test/Refactor/Extract/ExtractionSemicolonPolicy.cpp @@ -0,0 +1,192 @@ +// RUN: clang-refactor extract -selection=test:%s %s -- -std=c++11 2>&1 | grep -v CHECK | FileCheck %s + +struct Rectangle { int width, height; }; + +void extractStatement(const Rectangle &r) { + /*range adeclstmt=->+0:59*/int area = r.width * r.height; +} +// CHECK: 1 'adeclstmt' results: +// CHECK: static void extracted() { +// CHECK-NEXT: int area = r.width * r.height;{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void extractStatement(const Rectangle &r) { +// CHECK-NEXT: /*range adeclstmt=->+0:59*/extracted();{{$}} +// CHECK-NEXT: } + +void extractStatementNoSemiIf(const Rectangle &r) { + /*range bextractif=->+2:4*/if (r.width) { + int x = r.height; + } +} +// CHECK: 1 'bextractif' results: +// CHECK: static void extracted() { +// CHECK-NEXT: if (r.width) { +// CHECK-NEXT: int x = r.height; +// CHECK-NEXT: }{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void extractStatementNoSemiIf(const Rectangle &r) { +// CHECK-NEXT: /*range bextractif=->+2:4*/extracted();{{$}} +// CHECK-NEXT: } + +void extractStatementDontExtraneousSemi(const Rectangle &r) { + /*range cextractif=->+2:4*/if (r.width) { + int x = r.height; + } ; +} //^ This semicolon shouldn't be extracted. +// CHECK: 1 'cextractif' results: +// CHECK: static void extracted() { +// CHECK-NEXT: if (r.width) { +// CHECK-NEXT: int x = r.height; +// CHECK-NEXT: }{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void extractStatementDontExtraneousSemi(const Rectangle &r) { +// CHECK-NEXT: extracted(); ;{{$}} +// CHECK-NEXT: } + +void extractStatementNotSemiSwitch() { + /*range dextract=->+5:4*/switch (2) { + case 1: + break; + case 2: + break; + } +} +// CHECK: 1 'dextract' results: +// CHECK: static void extracted() { +// CHECK-NEXT: switch (2) { +// CHECK-NEXT: case 1: +// CHECK-NEXT: break; +// CHECK-NEXT: case 2: +// CHECK-NEXT: break; +// CHECK-NEXT: }{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void extractStatementNotSemiSwitch() { +// CHECK-NEXT: extracted();{{$}} +// CHECK-NEXT: } + +void extractStatementNotSemiWhile() { + /*range eextract=->+2:4*/while (true) { + int x = 0; + } +} +// CHECK: 1 'eextract' results: +// CHECK: static void extracted() { +// CHECK-NEXT: while (true) { +// CHECK-NEXT: int x = 0; +// CHECK-NEXT: }{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void extractStatementNotSemiWhile() { +// CHECK-NEXT: extracted();{{$}} +// CHECK-NEXT: } + +void extractStatementNotSemiFor() { + /*range fextract=->+1:4*/for (int i = 0; i < 10; ++i) { + } +} +// CHECK: 1 'fextract' results: +// CHECK: static void extracted() { +// CHECK-NEXT: for (int i = 0; i < 10; ++i) { +// CHECK-NEXT: }{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void extractStatementNotSemiFor() { +// CHECK-NEXT: extracted();{{$}} +// CHECK-NEXT: } + +struct XS { + int *begin() { return 0; } + int *end() { return 0; } +}; + +void extractStatementNotSemiRangedFor(XS xs) { + /*range gextract=->+1:4*/for (int i : xs) { + } +} +// CHECK: 1 'gextract' results: +// CHECK: static void extracted() { +// CHECK-NEXT: for (int i : xs) { +// CHECK-NEXT: }{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void extractStatementNotSemiRangedFor(XS xs) { +// CHECK-NEXT: extracted();{{$}} +// CHECK-NEXT: } + +void extractStatementNotSemiRangedTryCatch() { + /*range hextract=->+3:4*/try { int x = 0; } + catch (const int &i) { + int y = i; + } +} +// CHECK: 1 'hextract' results: +// CHECK: static void extracted() { +// CHECK-NEXT: try { int x = 0; } +// CHECK-NEXT: catch (const int &i) { +// CHECK-NEXT: int y = i; +// CHECK-NEXT: }{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void extractStatementNotSemiRangedTryCatch() { +// CHECK-NEXT: extracted();{{$}} +// CHECK-NEXT: } + +void extractCantFindSemicolon() { + /*range iextract=->+1:17*/do { + } while (true) + // Add a semicolon in both the extracted and original function as we don't + // want to extract the semicolon below. + ; +} +// CHECK: 1 'iextract' results: +// CHECK: static void extracted() { +// CHECK-NEXT: do { +// CHECK-NEXT: } while (true);{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void extractCantFindSemicolon() { +// CHECK-NEXT: extracted();{{$}} +// CHECK-NEXT: // +// CHECK-NEXT: // +// CHECK-NEXT: ; +// CHECK-NEXT: } + +void extractFindSemicolon() { + /*range jextract=->+1:17*/do { + } while (true) /*grab*/ ; +} +// CHECK: 1 'jextract' results: +// CHECK: static void extracted() { +// CHECK-NEXT: do { +// CHECK-NEXT: } while (true) /*grab*/ ;{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void extractFindSemicolon() { +// CHECK-NEXT: extracted();{{$}} +// CHECK-NEXT: } + +void call(); + +void careForNonCompoundSemicolons1() { + /*range kextract=->+1:11*/if (true) + call(); +} +// CHECK: 1 'kextract' results: +// CHECK: static void extracted() { +// CHECK-NEXT: if (true) +// CHECK-NEXT: call();{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void careForNonCompoundSemicolons1() { +// CHECK-NEXT: extracted();{{$}} +// CHECK-NEXT: } + +void careForNonCompoundSemicolons2() { + /*range lextract=->+3:1*/for (int i = 0; i < 10; ++i) + while (i != 0) + ; + // end right here111! +} +// CHECK: 1 'lextract' results: +// CHECK: static void extracted() { +// CHECK-NEXT: for (int i = 0; i < 10; ++i) +// CHECK-NEXT: while (i != 0) +// CHECK-NEXT: ;{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} +// CHECK-NEXT: void careForNonCompoundSemicolons2() { +// CHECK-NEXT: extracted();{{$}} +// CHECK-NEXT: // +// CHECK-NEXT: } Index: test/Refactor/Extract/ExtractionSemicolonPolicy.m =================================================================== --- /dev/null +++ test/Refactor/Extract/ExtractionSemicolonPolicy.m @@ -0,0 +1,56 @@ +// RUN: clang-refactor extract -selection=test:%s %s -- 2>&1 | grep -v CHECK | FileCheck %s + +@interface NSArray ++ (id)arrayWithObjects:(const id [])objects count:(unsigned long)cnt; +@end + +void extractStatementNoSemiObjCFor(NSArray *array) { + /*range astmt=->+2:4*/for (id i in array) { + int x = 0; + } +} +// CHECK: 1 'astmt' results: +// CHECK: static void extracted() { +// CHECK-NEXT: for (id i in array) { +// CHECK-NEXT: int x = 0; +// CHECK-NEXT: }{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} + +void extractStatementNoSemiSync() { + id lock; + /*range bstmt=->+2:4*/@synchronized(lock) { + int x = 0; + } +} +// CHECK: 1 'bstmt' results: +// CHECK: static void extracted() { +// CHECK-NEXT: @synchronized(lock) { +// CHECK-NEXT: int x = 0; +// CHECK-NEXT: }{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} + +void extractStatementNoSemiAutorel() { + /*range cstmt=->+2:4*/@autoreleasepool { + int x = 0; + } +} +// CHECK: 1 'cstmt' results: +// CHECK: static void extracted() { +// CHECK-NEXT: @autoreleasepool { +// CHECK-NEXT: int x = 0; +// CHECK-NEXT: }{{$}} +// CHECK-NEXT: }{{[[:space:]].*}} + +void extractStatementNoSemiTryFinalllllly() { + /*range dstmt=->+3:4*/@try { + int x = 0; + } @finally { + } +} +// CHECK: 1 'dstmt' results: +// CHECK: static void extracted() { +// CHECK-NEXT: @try { +// CHECK-NEXT: int x = 0; +// CHECK-NEXT: } @finally { +// CHECK-NEXT: }{{$}} +// CHECK-NEXT: }{{[[:space:]].*}}