Index: clang/docs/ClangFormatStyleOptions.rst =================================================================== --- clang/docs/ClangFormatStyleOptions.rst +++ clang/docs/ClangFormatStyleOptions.rst @@ -1737,6 +1737,25 @@ +**IndentRequires** (``bool``) + Indent the requires clause in a template + + .. code-block:: c++ + + true: + template + requires Iterator + void sort(It begin, It end) { + //.... + } + + false: + template + requires Iterator + void sort(It begin, It end) { + //.... + } + **IndentWidth** (``unsigned``) The number of columns to use for indentation. Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -297,6 +297,11 @@ bar(); }); +- Option ``IndentRequires`` has been added to indent the ``requires`` keyword + in templates. + + Support in clang-format for concepts has been improved + libclang -------- Index: clang/include/clang/Format/Format.h =================================================================== --- clang/include/clang/Format/Format.h +++ clang/include/clang/Format/Format.h @@ -1465,6 +1465,24 @@ /// The preprocessor directive indenting style to use. PPDirectiveIndentStyle IndentPPDirectives; + /// Indent the requires clause in a template + /// \code + /// true: + /// template + /// requires Iterator + /// void sort(It begin, It end) { + /// //.... + /// } + /// + /// false: + /// template + /// requires Iterator + /// void sort(It begin, It end) { + /// //.... + /// } + /// \endcode + bool IndentRequires; + /// The number of columns to use for indentation. /// \code /// IndentWidth: 3 @@ -1984,8 +2002,8 @@ /// \endcode SBPO_ControlStatements, /// Same as ``SBPO_ControlStatements`` except this option doesn't apply to - /// ForEach macros. This is useful in projects where ForEach macros are - /// treated as function calls instead of control statements. + /// ForEach macros. This is useful in projects where ForEach macros are + /// treated as function calls instead of control statements. /// \code /// void f() { /// Q_FOREACH(...) { @@ -2249,7 +2267,8 @@ IndentCaseBlocks == R.IndentCaseBlocks && IndentGotoLabels == R.IndentGotoLabels && IndentPPDirectives == R.IndentPPDirectives && - IndentWidth == R.IndentWidth && Language == R.Language && + IndentRequires == R.IndentRequires && IndentWidth == R.IndentWidth && + Language == R.Language && IndentWrappedFunctionNames == R.IndentWrappedFunctionNames && JavaImportGroups == R.JavaImportGroups && JavaScriptQuotes == R.JavaScriptQuotes && Index: clang/lib/Format/Format.cpp =================================================================== --- clang/lib/Format/Format.cpp +++ clang/lib/Format/Format.cpp @@ -497,6 +497,7 @@ IO.mapOptional("IndentCaseBlocks", Style.IndentCaseBlocks); IO.mapOptional("IndentGotoLabels", Style.IndentGotoLabels); IO.mapOptional("IndentPPDirectives", Style.IndentPPDirectives); + IO.mapOptional("IndentRequires", Style.IndentRequires); IO.mapOptional("IndentWidth", Style.IndentWidth); IO.mapOptional("IndentWrappedFunctionNames", Style.IndentWrappedFunctionNames); @@ -808,6 +809,7 @@ LLVMStyle.IndentCaseBlocks = false; LLVMStyle.IndentGotoLabels = true; LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None; + LLVMStyle.IndentRequires = false; LLVMStyle.IndentWrappedFunctionNames = false; LLVMStyle.IndentWidth = 2; LLVMStyle.InsertTrailingCommas = FormatStyle::TCS_None; Index: clang/lib/Format/FormatToken.h =================================================================== --- clang/lib/Format/FormatToken.h +++ clang/lib/Format/FormatToken.h @@ -39,6 +39,7 @@ TYPE(ConflictAlternative) \ TYPE(ConflictEnd) \ TYPE(ConflictStart) \ + TYPE(ConstraintJunctions) \ TYPE(CtorInitializerColon) \ TYPE(CtorInitializerComma) \ TYPE(DesignatedInitializerLSquare) \ @@ -458,6 +459,7 @@ case tok::kw_noexcept: case tok::kw_static_assert: case tok::kw___attribute: + case tok::kw_requires: return true; default: return false; Index: clang/lib/Format/TokenAnnotator.cpp =================================================================== --- clang/lib/Format/TokenAnnotator.cpp +++ clang/lib/Format/TokenAnnotator.cpp @@ -1309,7 +1309,7 @@ TT_TypenameMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator, TT_RegexLiteral, TT_TemplateString, - TT_ObjCStringLiteral)) + TT_ObjCStringLiteral, TT_ConstraintJunctions)) CurrentToken->Type = TT_Unknown; CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; @@ -1563,7 +1563,11 @@ !Current.Previous->is(tok::kw_operator)) { // not auto operator->() -> xxx; Current.Type = TT_TrailingReturnArrow; - + } else if (Current.is(tok::arrow) && Current.Previous && + Current.Previous->is(tok::r_brace)) { + // Concept implicit conversion contraint needs to be treated like + // a trailing return type ... } -> . + Current.Type = TT_TrailingReturnArrow; } else if (isDeductionGuide(Current)) { // Deduction guides trailing arrow " A(...) -> A;". Current.Type = TT_TrailingReturnArrow; @@ -2739,6 +2743,14 @@ isKeywordWithCondition(*Right.MatchingParen->Previous)) return true; } + + // requires ( or requires( + if (Right.is(tok::l_paren) && Left.is(tok::kw_requires)) + return spaceRequiredBeforeParens(Right); + // requires clause Concept1 && Concept2 + if (Left.is(TT_ConstraintJunctions) && Right.is(tok::identifier)) + return true; + if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) return (Right.is(TT_CastRParen) || (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen))) @@ -3466,6 +3478,12 @@ return true; } + // Put concepts on the next line e.g. + // template + // concept ... + if (Left.is(TT_TemplateCloser) && Right.is(tok::kw_concept)) + return true; + if (Right.is(tok::comment)) return Left.BlockKind != BK_BracedInit && Left.isNot(TT_CtorInitializerColon) && Index: clang/lib/Format/UnwrappedLineParser.h =================================================================== --- clang/lib/Format/UnwrappedLineParser.h +++ clang/lib/Format/UnwrappedLineParser.h @@ -113,6 +113,8 @@ void parseNew(); void parseAccessSpecifier(); bool parseEnum(); + void parseConcept(); + void parseRequires(); void parseJavaEnumBody(); // Parses a record (aka class) as a top level element. If ParseAsExpr is true, // parses the record as a child block, i.e. if the class declaration is an Index: clang/lib/Format/UnwrappedLineParser.cpp =================================================================== --- clang/lib/Format/UnwrappedLineParser.cpp +++ clang/lib/Format/UnwrappedLineParser.cpp @@ -628,6 +628,13 @@ if (MunchSemi && FormatTok->Tok.is(tok::semi)) nextToken(); + else if (FormatTok->is(tok::arrow)) { + // Following the } we can find a trailing return type arrow + // as part of an implicit conversion constraint. + nextToken(); + parseStructuralElement(); + } + Line->Level = InitialLevel; if (PPStartHash == PPEndHash) { @@ -1257,6 +1264,12 @@ break; } break; + case tok::kw_concept: + parseConcept(); + break; + case tok::kw_requires: + parseRequires(); + break; case tok::kw_enum: // Ignore if this is part of "template is(tok::less)) { @@ -2255,6 +2268,80 @@ addUnwrappedLine(); } +void UnwrappedLineParser::parseConcept() { + assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected"); + nextToken(); + if (!FormatTok->Tok.is(tok::identifier)) + return; + nextToken(); + if (!FormatTok->Tok.is(tok::equal)) + return; + nextToken(); + if (FormatTok->Tok.is(tok::kw_requires)) + parseRequires(); +} + +void UnwrappedLineParser::parseRequires() { + assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected"); + + unsigned int OriginalLevel = Line->Level; + if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) { + addUnwrappedLine(); + if (Style.IndentRequires) { + Line->Level++; + } + } + nextToken(); + + // requires (R range) + if (FormatTok->Tok.is(tok::l_paren)) { + parseParens(); + if (Style.IndentRequires && OriginalLevel != Line->Level) { + addUnwrappedLine(); + --Line->Level; + } + } + + if (FormatTok->Tok.is(tok::l_brace)) { + parseBlock(/*MustBeDeclaration=*/false); + addUnwrappedLine(); + } else { + // requires Id && Id || Id + while (FormatTok->is(tok::identifier)) { + nextToken(); + if (FormatTok->Tok.is(tok::less)) { + while (!FormatTok->Tok.is(tok::greater)) { + nextToken(); + } + nextToken(); + } + if (FormatTok->Tok.is(tok::l_paren)) { + parseParens(); + } + if (FormatTok->Tok.is(tok::l_brace)) { + parseBlock(/*MustBeDeclaration=*/false); + } + if (FormatTok->Tok.is(tok::semi)) { + // Eat any trailing semi. + nextToken(); + addUnwrappedLine(); + } + if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) { + if (FormatTok->Previous && !FormatTok->Previous->is(tok::identifier)) { + addUnwrappedLine(); + } + if (Style.IndentRequires && OriginalLevel != Line->Level) { + --Line->Level; + } + break; + } else + FormatTok->Type = TT_ConstraintJunctions; + + nextToken(); + } + } +} + bool UnwrappedLineParser::parseEnum() { // Won't be 'enum' for NS_ENUMs. if (FormatTok->Tok.is(tok::kw_enum)) Index: clang/lib/Format/UnwrappedLineParser.cpp.new =================================================================== --- /dev/null +++ clang/lib/Format/UnwrappedLineParser.cpp.new @@ -0,0 +1,3002 @@ +//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the implementation of the UnwrappedLineParser, +/// which turns a stream of tokens into UnwrappedLines. +/// +//===----------------------------------------------------------------------===// + +#include "UnwrappedLineParser.h" +#include "FormatToken.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include + +#define DEBUG_TYPE "format-parser" + +namespace clang { +namespace format { + +class FormatTokenSource { +public: + virtual ~FormatTokenSource() {} + virtual FormatToken *getNextToken() = 0; + + virtual unsigned getPosition() = 0; + virtual FormatToken *setPosition(unsigned Position) = 0; +}; + +namespace { + +class ScopedDeclarationState { +public: + ScopedDeclarationState(UnwrappedLine &Line, std::vector &Stack, + bool MustBeDeclaration) + : Line(Line), Stack(Stack) { + Line.MustBeDeclaration = MustBeDeclaration; + Stack.push_back(MustBeDeclaration); + } + ~ScopedDeclarationState() { + Stack.pop_back(); + if (!Stack.empty()) + Line.MustBeDeclaration = Stack.back(); + else + Line.MustBeDeclaration = true; + } + +private: + UnwrappedLine &Line; + std::vector &Stack; +}; + +static bool isLineComment(const FormatToken &FormatTok) { + return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); +} + +// Checks if \p FormatTok is a line comment that continues the line comment +// \p Previous. The original column of \p MinColumnToken is used to determine +// whether \p FormatTok is indented enough to the right to continue \p Previous. +static bool continuesLineComment(const FormatToken &FormatTok, + const FormatToken *Previous, + const FormatToken *MinColumnToken) { + if (!Previous || !MinColumnToken) + return false; + unsigned MinContinueColumn = + MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); + return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && + isLineComment(*Previous) && + FormatTok.OriginalColumn >= MinContinueColumn; +} + +class ScopedMacroState : public FormatTokenSource { +public: + ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, + FormatToken *&ResetToken) + : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), + PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), + Token(nullptr), PreviousToken(nullptr) { + FakeEOF.Tok.startToken(); + FakeEOF.Tok.setKind(tok::eof); + TokenSource = this; + Line.Level = 0; + Line.InPPDirective = true; + } + + ~ScopedMacroState() override { + TokenSource = PreviousTokenSource; + ResetToken = Token; + Line.InPPDirective = false; + Line.Level = PreviousLineLevel; + } + + FormatToken *getNextToken() override { + // The \c UnwrappedLineParser guards against this by never calling + // \c getNextToken() after it has encountered the first eof token. + assert(!eof()); + PreviousToken = Token; + Token = PreviousTokenSource->getNextToken(); + if (eof()) + return &FakeEOF; + return Token; + } + + unsigned getPosition() override { return PreviousTokenSource->getPosition(); } + + FormatToken *setPosition(unsigned Position) override { + PreviousToken = nullptr; + Token = PreviousTokenSource->setPosition(Position); + return Token; + } + +private: + bool eof() { + return Token && Token->HasUnescapedNewline && + !continuesLineComment(*Token, PreviousToken, + /*MinColumnToken=*/PreviousToken); + } + + FormatToken FakeEOF; + UnwrappedLine &Line; + FormatTokenSource *&TokenSource; + FormatToken *&ResetToken; + unsigned PreviousLineLevel; + FormatTokenSource *PreviousTokenSource; + + FormatToken *Token; + FormatToken *PreviousToken; +}; + +} // end anonymous namespace + +class ScopedLineState { +public: + ScopedLineState(UnwrappedLineParser &Parser, + bool SwitchToPreprocessorLines = false) + : Parser(Parser), OriginalLines(Parser.CurrentLines) { + if (SwitchToPreprocessorLines) + Parser.CurrentLines = &Parser.PreprocessorDirectives; + else if (!Parser.Line->Tokens.empty()) + Parser.CurrentLines = &Parser.Line->Tokens.back().Children; + PreBlockLine = std::move(Parser.Line); + Parser.Line = std::make_unique(); + Parser.Line->Level = PreBlockLine->Level; + Parser.Line->InPPDirective = PreBlockLine->InPPDirective; + } + + ~ScopedLineState() { + if (!Parser.Line->Tokens.empty()) { + Parser.addUnwrappedLine(); + } + assert(Parser.Line->Tokens.empty()); + Parser.Line = std::move(PreBlockLine); + if (Parser.CurrentLines == &Parser.PreprocessorDirectives) + Parser.MustBreakBeforeNextToken = true; + Parser.CurrentLines = OriginalLines; + } + +private: + UnwrappedLineParser &Parser; + + std::unique_ptr PreBlockLine; + SmallVectorImpl *OriginalLines; +}; + +class CompoundStatementIndenter { +public: + CompoundStatementIndenter(UnwrappedLineParser *Parser, + const FormatStyle &Style, unsigned &LineLevel) + : CompoundStatementIndenter(Parser, LineLevel, + Style.BraceWrapping.AfterControlStatement, + Style.BraceWrapping.IndentBraces) {} + CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, + bool WrapBrace, bool IndentBrace) + : LineLevel(LineLevel), OldLineLevel(LineLevel) { + if (WrapBrace) + Parser->addUnwrappedLine(); + if (IndentBrace) + ++LineLevel; + } + ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } + +private: + unsigned &LineLevel; + unsigned OldLineLevel; +}; + +namespace { + +class IndexedTokenSource : public FormatTokenSource { +public: + IndexedTokenSource(ArrayRef Tokens) + : Tokens(Tokens), Position(-1) {} + + FormatToken *getNextToken() override { + ++Position; + return Tokens[Position]; + } + + unsigned getPosition() override { + assert(Position >= 0); + return Position; + } + + FormatToken *setPosition(unsigned P) override { + Position = P; + return Tokens[Position]; + } + + void reset() { Position = -1; } + +private: + ArrayRef Tokens; + int Position; +}; + +} // end anonymous namespace + +UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, + const AdditionalKeywords &Keywords, + unsigned FirstStartColumn, + ArrayRef Tokens, + UnwrappedLineConsumer &Callback) + : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), + CurrentLines(&Lines), Style(Style), Keywords(Keywords), + CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), + Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), + IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None + ? IG_Rejected + : IG_Inited), + IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} + +void UnwrappedLineParser::reset() { + PPBranchLevel = -1; + IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None + ? IG_Rejected + : IG_Inited; + IncludeGuardToken = nullptr; + Line.reset(new UnwrappedLine); + CommentsBeforeNextToken.clear(); + FormatTok = nullptr; + MustBreakBeforeNextToken = false; + PreprocessorDirectives.clear(); + CurrentLines = &Lines; + DeclarationScopeStack.clear(); + PPStack.clear(); + Line->FirstStartColumn = FirstStartColumn; +} + +void UnwrappedLineParser::parse() { + IndexedTokenSource TokenSource(AllTokens); + Line->FirstStartColumn = FirstStartColumn; + do { + LLVM_DEBUG(llvm::dbgs() << "----\n"); + reset(); + Tokens = &TokenSource; + TokenSource.reset(); + + readToken(); + parseFile(); + + // If we found an include guard then all preprocessor directives (other than + // the guard) are over-indented by one. + if (IncludeGuard == IG_Found) + for (auto &Line : Lines) + if (Line.InPPDirective && Line.Level > 0) + --Line.Level; + + // Create line with eof token. + pushToken(FormatTok); + addUnwrappedLine(); + + for (SmallVectorImpl::iterator I = Lines.begin(), + E = Lines.end(); + I != E; ++I) { + Callback.consumeUnwrappedLine(*I); + } + Callback.finishRun(); + Lines.clear(); + while (!PPLevelBranchIndex.empty() && + PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { + PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); + PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); + } + if (!PPLevelBranchIndex.empty()) { + ++PPLevelBranchIndex.back(); + assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); + assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); + } + } while (!PPLevelBranchIndex.empty()); +} + +void UnwrappedLineParser::parseFile() { + // The top-level context in a file always has declarations, except for pre- + // processor directives and JavaScript files. + bool MustBeDeclaration = + !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; + ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, + MustBeDeclaration); + if (Style.Language == FormatStyle::LK_TextProto) + parseBracedList(); + else + parseLevel(/*HasOpeningBrace=*/false); + // Make sure to format the remaining tokens. + // + // LK_TextProto is special since its top-level is parsed as the body of a + // braced list, which does not necessarily have natural line separators such + // as a semicolon. Comments after the last entry that have been determined to + // not belong to that line, as in: + // key: value + // // endfile comment + // do not have a chance to be put on a line of their own until this point. + // Here we add this newline before end-of-file comments. + if (Style.Language == FormatStyle::LK_TextProto && + !CommentsBeforeNextToken.empty()) + addUnwrappedLine(); + flushComments(true); + addUnwrappedLine(); +} + +void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { + do { + switch (FormatTok->Tok.getKind()) { + case tok::l_brace: + return; + default: + if (FormatTok->is(Keywords.kw_where)) { + addUnwrappedLine(); + nextToken(); + parseCSharpGenericTypeConstraint(); + break; + } + nextToken(); + break; + } + } while (!eof()); +} + +void UnwrappedLineParser::parseCSharpAttribute() { + int UnpairedSquareBrackets = 1; + do { + switch (FormatTok->Tok.getKind()) { + case tok::r_square: + nextToken(); + --UnpairedSquareBrackets; + if (UnpairedSquareBrackets == 0) { + addUnwrappedLine(); + return; + } + break; + case tok::l_square: + ++UnpairedSquareBrackets; + nextToken(); + break; + default: + nextToken(); + break; + } + } while (!eof()); +} + +void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { + bool SwitchLabelEncountered = false; + do { + tok::TokenKind kind = FormatTok->Tok.getKind(); + if (FormatTok->Type == TT_MacroBlockBegin) { + kind = tok::l_brace; + } else if (FormatTok->Type == TT_MacroBlockEnd) { + kind = tok::r_brace; + } + + switch (kind) { + case tok::comment: + nextToken(); + addUnwrappedLine(); + break; + case tok::l_brace: + // FIXME: Add parameter whether this can happen - if this happens, we must + // be in a non-declaration context. + if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) + continue; + parseBlock(/*MustBeDeclaration=*/false); + addUnwrappedLine(); + break; + case tok::r_brace: + if (HasOpeningBrace) + return; + nextToken(); + addUnwrappedLine(); + break; + case tok::kw_default: { + unsigned StoredPosition = Tokens->getPosition(); + FormatToken *Next; + do { + Next = Tokens->getNextToken(); + } while (Next && Next->is(tok::comment)); + FormatTok = Tokens->setPosition(StoredPosition); + if (Next && Next->isNot(tok::colon)) { + // default not followed by ':' is not a case label; treat it like + // an identifier. + parseStructuralElement(); + break; + } + // Else, if it is 'default:', fall through to the case handling. + LLVM_FALLTHROUGH; + } + case tok::kw_case: + if (Style.Language == FormatStyle::LK_JavaScript && + Line->MustBeDeclaration) { + // A 'case: string' style field declaration. + parseStructuralElement(); + break; + } + if (!SwitchLabelEncountered && + (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) + ++Line->Level; + SwitchLabelEncountered = true; + parseStructuralElement(); + break; + case tok::l_square: + if (Style.isCSharp()) { + nextToken(); + parseCSharpAttribute(); + break; + } + LLVM_FALLTHROUGH; + default: + parseStructuralElement(); + break; + } + } while (!eof()); +} + +void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { + // We'll parse forward through the tokens until we hit + // a closing brace or eof - note that getNextToken() will + // parse macros, so this will magically work inside macro + // definitions, too. + unsigned StoredPosition = Tokens->getPosition(); + FormatToken *Tok = FormatTok; + const FormatToken *PrevTok = Tok->Previous; + // Keep a stack of positions of lbrace tokens. We will + // update information about whether an lbrace starts a + // braced init list or a different block during the loop. + SmallVector LBraceStack; + assert(Tok->Tok.is(tok::l_brace)); + do { + // Get next non-comment token. + FormatToken *NextTok; + unsigned ReadTokens = 0; + do { + NextTok = Tokens->getNextToken(); + ++ReadTokens; + } while (NextTok->is(tok::comment)); + + switch (Tok->Tok.getKind()) { + case tok::l_brace: + if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { + if (PrevTok->isOneOf(tok::colon, tok::less)) + // A ':' indicates this code is in a type, or a braced list + // following a label in an object literal ({a: {b: 1}}). + // A '<' could be an object used in a comparison, but that is nonsense + // code (can never return true), so more likely it is a generic type + // argument (`X<{a: string; b: number}>`). + // The code below could be confused by semicolons between the + // individual members in a type member list, which would normally + // trigger BK_Block. In both cases, this must be parsed as an inline + // braced init. + Tok->BlockKind = BK_BracedInit; + else if (PrevTok->is(tok::r_paren)) + // `) { }` can only occur in function or method declarations in JS. + Tok->BlockKind = BK_Block; + } else { + Tok->BlockKind = BK_Unknown; + } + LBraceStack.push_back(Tok); + break; + case tok::r_brace: + if (LBraceStack.empty()) + break; + if (LBraceStack.back()->BlockKind == BK_Unknown) { + bool ProbablyBracedList = false; + if (Style.Language == FormatStyle::LK_Proto) { + ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); + } else { + // Using OriginalColumn to distinguish between ObjC methods and + // binary operators is a bit hacky. + bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && + NextTok->OriginalColumn == 0; + + // If there is a comma, semicolon or right paren after the closing + // brace, we assume this is a braced initializer list. Note that + // regardless how we mark inner braces here, we will overwrite the + // BlockKind later if we parse a braced list (where all blocks + // inside are by default braced lists), or when we explicitly detect + // blocks (for example while parsing lambdas). + // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a + // braced list in JS. + ProbablyBracedList = + (Style.Language == FormatStyle::LK_JavaScript && + NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, + Keywords.kw_as)) || + (Style.isCpp() && NextTok->is(tok::l_paren)) || + NextTok->isOneOf(tok::comma, tok::period, tok::colon, + tok::r_paren, tok::r_square, tok::l_brace, + tok::ellipsis) || + (NextTok->is(tok::identifier) && + !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || + (NextTok->is(tok::semi) && + (!ExpectClassBody || LBraceStack.size() != 1)) || + (NextTok->isBinaryOperator() && !NextIsObjCMethod); + if (!Style.isCSharp() && NextTok->is(tok::l_square)) { + // We can have an array subscript after a braced init + // list, but C++11 attributes are expected after blocks. + NextTok = Tokens->getNextToken(); + ++ReadTokens; + ProbablyBracedList = NextTok->isNot(tok::l_square); + } + } + if (ProbablyBracedList) { + Tok->BlockKind = BK_BracedInit; + LBraceStack.back()->BlockKind = BK_BracedInit; + } else { + Tok->BlockKind = BK_Block; + LBraceStack.back()->BlockKind = BK_Block; + } + } + LBraceStack.pop_back(); + break; + case tok::identifier: + if (!Tok->is(TT_StatementMacro)) + break; + LLVM_FALLTHROUGH; + case tok::at: + case tok::semi: + case tok::kw_if: + case tok::kw_while: + case tok::kw_for: + case tok::kw_switch: + case tok::kw_try: + case tok::kw___try: + if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) + LBraceStack.back()->BlockKind = BK_Block; + break; + default: + break; + } + PrevTok = Tok; + Tok = NextTok; + } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); + + // Assume other blocks for all unclosed opening braces. + for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { + if (LBraceStack[i]->BlockKind == BK_Unknown) + LBraceStack[i]->BlockKind = BK_Block; + } + + FormatTok = Tokens->setPosition(StoredPosition); +} + +template +static inline void hash_combine(std::size_t &seed, const T &v) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +size_t UnwrappedLineParser::computePPHash() const { + size_t h = 0; + for (const auto &i : PPStack) { + hash_combine(h, size_t(i.Kind)); + hash_combine(h, i.Line); + } + return h; +} + +void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, + bool MunchSemi) { + assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && + "'{' or macro block token expected"); + const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); + FormatTok->BlockKind = BK_Block; + + size_t PPStartHash = computePPHash(); + + unsigned InitialLevel = Line->Level; + nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); + + if (MacroBlock && FormatTok->is(tok::l_paren)) + parseParens(); + + size_t NbPreprocessorDirectives = + CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; + addUnwrappedLine(); + size_t OpeningLineIndex = + CurrentLines->empty() + ? (UnwrappedLine::kInvalidIndex) + : (CurrentLines->size() - 1 - NbPreprocessorDirectives); + + ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, + MustBeDeclaration); + if (AddLevel) + ++Line->Level; + parseLevel(/*HasOpeningBrace=*/true); + + if (eof()) + return; + + if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) + : !FormatTok->is(tok::r_brace)) { + Line->Level = InitialLevel; + FormatTok->BlockKind = BK_Block; + return; + } + + size_t PPEndHash = computePPHash(); + + // Munch the closing brace. + nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); + + if (MacroBlock && FormatTok->is(tok::l_paren)) + parseParens(); + + if (MunchSemi && FormatTok->Tok.is(tok::semi)) + nextToken(); + Line->Level = InitialLevel; + + if (PPStartHash == PPEndHash) { + Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; + if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { + // Update the opening line to add the forward reference as well + (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = + CurrentLines->size() - 1; + } + } +} + +static bool isGoogScope(const UnwrappedLine &Line) { + // FIXME: Closure-library specific stuff should not be hard-coded but be + // configurable. + if (Line.Tokens.size() < 4) + return false; + auto I = Line.Tokens.begin(); + if (I->Tok->TokenText != "goog") + return false; + ++I; + if (I->Tok->isNot(tok::period)) + return false; + ++I; + if (I->Tok->TokenText != "scope") + return false; + ++I; + return I->Tok->is(tok::l_paren); +} + +static bool isIIFE(const UnwrappedLine &Line, + const AdditionalKeywords &Keywords) { + // Look for the start of an immediately invoked anonymous function. + // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression + // This is commonly done in JavaScript to create a new, anonymous scope. + // Example: (function() { ... })() + if (Line.Tokens.size() < 3) + return false; + auto I = Line.Tokens.begin(); + if (I->Tok->isNot(tok::l_paren)) + return false; + ++I; + if (I->Tok->isNot(Keywords.kw_function)) + return false; + ++I; + return I->Tok->is(tok::l_paren); +} + +static bool ShouldBreakBeforeBrace(const FormatStyle &Style, + const FormatToken &InitialToken) { + if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro)) + return Style.BraceWrapping.AfterNamespace; + if (InitialToken.is(tok::kw_class)) + return Style.BraceWrapping.AfterClass; + if (InitialToken.is(tok::kw_union)) + return Style.BraceWrapping.AfterUnion; + if (InitialToken.is(tok::kw_struct)) + return Style.BraceWrapping.AfterStruct; + return false; +} + +void UnwrappedLineParser::parseChildBlock() { + FormatTok->BlockKind = BK_Block; + nextToken(); + { + bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && + (isGoogScope(*Line) || isIIFE(*Line, Keywords))); + ScopedLineState LineState(*this); + ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, + /*MustBeDeclaration=*/false); + Line->Level += SkipIndent ? 0 : 1; + parseLevel(/*HasOpeningBrace=*/true); + flushComments(isOnNewLine(*FormatTok)); + Line->Level -= SkipIndent ? 0 : 1; + } + nextToken(); +} + +void UnwrappedLineParser::parsePPDirective() { + assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); + ScopedMacroState MacroState(*Line, Tokens, FormatTok); + + nextToken(); + + if (!FormatTok->Tok.getIdentifierInfo()) { + parsePPUnknown(); + return; + } + + switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { + case tok::pp_define: + parsePPDefine(); + return; + case tok::pp_if: + parsePPIf(/*IfDef=*/false); + break; + case tok::pp_ifdef: + case tok::pp_ifndef: + parsePPIf(/*IfDef=*/true); + break; + case tok::pp_else: + parsePPElse(); + break; + case tok::pp_elif: + parsePPElIf(); + break; + case tok::pp_endif: + parsePPEndIf(); + break; + default: + parsePPUnknown(); + break; + } +} + +void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { + size_t Line = CurrentLines->size(); + if (CurrentLines == &PreprocessorDirectives) + Line += Lines.size(); + + if (Unreachable || + (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) + PPStack.push_back({PP_Unreachable, Line}); + else + PPStack.push_back({PP_Conditional, Line}); +} + +void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { + ++PPBranchLevel; + assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); + if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { + PPLevelBranchIndex.push_back(0); + PPLevelBranchCount.push_back(0); + } + PPChainBranchIndex.push(0); + bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; + conditionalCompilationCondition(Unreachable || Skip); +} + +void UnwrappedLineParser::conditionalCompilationAlternative() { + if (!PPStack.empty()) + PPStack.pop_back(); + assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); + if (!PPChainBranchIndex.empty()) + ++PPChainBranchIndex.top(); + conditionalCompilationCondition( + PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && + PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); +} + +void UnwrappedLineParser::conditionalCompilationEnd() { + assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); + if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { + if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { + PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; + } + } + // Guard against #endif's without #if. + if (PPBranchLevel > -1) + --PPBranchLevel; + if (!PPChainBranchIndex.empty()) + PPChainBranchIndex.pop(); + if (!PPStack.empty()) + PPStack.pop_back(); +} + +void UnwrappedLineParser::parsePPIf(bool IfDef) { + bool IfNDef = FormatTok->is(tok::pp_ifndef); + nextToken(); + bool Unreachable = false; + if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) + Unreachable = true; + if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") + Unreachable = true; + conditionalCompilationStart(Unreachable); + FormatToken *IfCondition = FormatTok; + // If there's a #ifndef on the first line, and the only lines before it are + // comments, it could be an include guard. + bool MaybeIncludeGuard = IfNDef; + if (IncludeGuard == IG_Inited && MaybeIncludeGuard) + for (auto &Line : Lines) { + if (!Line.Tokens.front().Tok->is(tok::comment)) { + MaybeIncludeGuard = false; + IncludeGuard = IG_Rejected; + break; + } + } + --PPBranchLevel; + parsePPUnknown(); + ++PPBranchLevel; + if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { + IncludeGuard = IG_IfNdefed; + IncludeGuardToken = IfCondition; + } +} + +void UnwrappedLineParser::parsePPElse() { + // If a potential include guard has an #else, it's not an include guard. + if (IncludeGuard == IG_Defined && PPBranchLevel == 0) + IncludeGuard = IG_Rejected; + conditionalCompilationAlternative(); + if (PPBranchLevel > -1) + --PPBranchLevel; + parsePPUnknown(); + ++PPBranchLevel; +} + +void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } + +void UnwrappedLineParser::parsePPEndIf() { + conditionalCompilationEnd(); + parsePPUnknown(); + // If the #endif of a potential include guard is the last thing in the file, + // then we found an include guard. + unsigned TokenPosition = Tokens->getPosition(); + FormatToken *PeekNext = AllTokens[TokenPosition]; + if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && + PeekNext->is(tok::eof) && + Style.IndentPPDirectives != FormatStyle::PPDIS_None) + IncludeGuard = IG_Found; +} + +void UnwrappedLineParser::parsePPDefine() { + nextToken(); + + if (!FormatTok->Tok.getIdentifierInfo()) { + IncludeGuard = IG_Rejected; + IncludeGuardToken = nullptr; + parsePPUnknown(); + return; + } + + if (IncludeGuard == IG_IfNdefed && + IncludeGuardToken->TokenText == FormatTok->TokenText) { + IncludeGuard = IG_Defined; + IncludeGuardToken = nullptr; + for (auto &Line : Lines) { + if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { + IncludeGuard = IG_Rejected; + break; + } + } + } + + nextToken(); + if (FormatTok->Tok.getKind() == tok::l_paren && + FormatTok->WhitespaceRange.getBegin() == + FormatTok->WhitespaceRange.getEnd()) { + parseParens(); + } + if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) + Line->Level += PPBranchLevel + 1; + addUnwrappedLine(); + ++Line->Level; + + // Errors during a preprocessor directive can only affect the layout of the + // preprocessor directive, and thus we ignore them. An alternative approach + // would be to use the same approach we use on the file level (no + // re-indentation if there was a structural error) within the macro + // definition. + parseFile(); +} + +void UnwrappedLineParser::parsePPUnknown() { + do { + nextToken(); + } while (!eof()); + if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) + Line->Level += PPBranchLevel + 1; + addUnwrappedLine(); +} + +// Here we blacklist certain tokens that are not usually the first token in an +// unwrapped line. This is used in attempt to distinguish macro calls without +// trailing semicolons from other constructs split to several lines. +static bool tokenCanStartNewLine(const clang::Token &Tok) { + // Semicolon can be a null-statement, l_square can be a start of a macro or + // a C++11 attribute, but this doesn't seem to be common. + return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && + Tok.isNot(tok::l_square) && + // Tokens that can only be used as binary operators and a part of + // overloaded operator names. + Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && + Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && + Tok.isNot(tok::less) && Tok.isNot(tok::greater) && + Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && + Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && + Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && + Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && + Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && + Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && + Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && + Tok.isNot(tok::lesslessequal) && + // Colon is used in labels, base class lists, initializer lists, + // range-based for loops, ternary operator, but should never be the + // first token in an unwrapped line. + Tok.isNot(tok::colon) && + // 'noexcept' is a trailing annotation. + Tok.isNot(tok::kw_noexcept); +} + +static bool mustBeJSIdent(const AdditionalKeywords &Keywords, + const FormatToken *FormatTok) { + // FIXME: This returns true for C/C++ keywords like 'struct'. + return FormatTok->is(tok::identifier) && + (FormatTok->Tok.getIdentifierInfo() == nullptr || + !FormatTok->isOneOf( + Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, + Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, + Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, + Keywords.kw_let, Keywords.kw_var, tok::kw_const, + Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, + Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, + Keywords.kw_from)); +} + +static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, + const FormatToken *FormatTok) { + return FormatTok->Tok.isLiteral() || + FormatTok->isOneOf(tok::kw_true, tok::kw_false) || + mustBeJSIdent(Keywords, FormatTok); +} + +// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement +// when encountered after a value (see mustBeJSIdentOrValue). +static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, + const FormatToken *FormatTok) { + return FormatTok->isOneOf( + tok::kw_return, Keywords.kw_yield, + // conditionals + tok::kw_if, tok::kw_else, + // loops + tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, + // switch/case + tok::kw_switch, tok::kw_case, + // exceptions + tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, + // declaration + tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, + Keywords.kw_async, Keywords.kw_function, + // import/export + Keywords.kw_import, tok::kw_export); +} + +// readTokenWithJavaScriptASI reads the next token and terminates the current +// line if JavaScript Automatic Semicolon Insertion must +// happen between the current token and the next token. +// +// This method is conservative - it cannot cover all edge cases of JavaScript, +// but only aims to correctly handle certain well known cases. It *must not* +// return true in speculative cases. +void UnwrappedLineParser::readTokenWithJavaScriptASI() { + FormatToken *Previous = FormatTok; + readToken(); + FormatToken *Next = FormatTok; + + bool IsOnSameLine = + CommentsBeforeNextToken.empty() + ? Next->NewlinesBefore == 0 + : CommentsBeforeNextToken.front()->NewlinesBefore == 0; + if (IsOnSameLine) + return; + + bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); + bool PreviousStartsTemplateExpr = + Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); + if (PreviousMustBeValue || Previous->is(tok::r_paren)) { + // If the line contains an '@' sign, the previous token might be an + // annotation, which can precede another identifier/value. + bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), + [](UnwrappedLineNode &LineNode) { + return LineNode.Tok->is(tok::at); + }) != Line->Tokens.end(); + if (HasAt) + return; + } + if (Next->is(tok::exclaim) && PreviousMustBeValue) + return addUnwrappedLine(); + bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); + bool NextEndsTemplateExpr = + Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); + if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && + (PreviousMustBeValue || + Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, + tok::minusminus))) + return addUnwrappedLine(); + if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && + isJSDeclOrStmt(Keywords, Next)) + return addUnwrappedLine(); +} + +void UnwrappedLineParser::parseStructuralElement() { + assert(!FormatTok->is(tok::l_brace)); + if (Style.Language == FormatStyle::LK_TableGen && + FormatTok->is(tok::pp_include)) { + nextToken(); + if (FormatTok->is(tok::string_literal)) + nextToken(); + addUnwrappedLine(); + return; + } + switch (FormatTok->Tok.getKind()) { + case tok::kw_asm: + nextToken(); + if (FormatTok->is(tok::l_brace)) { + FormatTok->Type = TT_InlineASMBrace; + nextToken(); + while (FormatTok && FormatTok->isNot(tok::eof)) { + if (FormatTok->is(tok::r_brace)) { + FormatTok->Type = TT_InlineASMBrace; + nextToken(); + addUnwrappedLine(); + break; + } + FormatTok->Finalized = true; + nextToken(); + } + } + break; + case tok::kw_namespace: + parseNamespace(); + return; + case tok::kw_public: + case tok::kw_protected: + case tok::kw_private: + if (Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) + nextToken(); + else + parseAccessSpecifier(); + return; + case tok::kw_if: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // field/method declaration. + break; + parseIfThenElse(); + return; + case tok::kw_for: + case tok::kw_while: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // field/method declaration. + break; + parseForOrWhileLoop(); + return; + case tok::kw_do: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // field/method declaration. + break; + parseDoWhile(); + return; + case tok::kw_switch: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // 'switch: string' field declaration. + break; + parseSwitch(); + return; + case tok::kw_default: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // 'default: string' field declaration. + break; + nextToken(); + if (FormatTok->is(tok::colon)) { + parseLabel(); + return; + } + // e.g. "default void f() {}" in a Java interface. + break; + case tok::kw_case: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // 'case: string' field declaration. + break; + parseCaseLabel(); + return; + case tok::kw_try: + case tok::kw___try: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // field/method declaration. + break; + parseTryCatch(); + return; + case tok::kw_extern: + nextToken(); + if (FormatTok->Tok.is(tok::string_literal)) { + nextToken(); + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BraceWrapping.AfterExternBlock) { + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/true); + } else { + parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); + } + addUnwrappedLine(); + return; + } + } + break; + case tok::kw_export: + if (Style.Language == FormatStyle::LK_JavaScript) { + parseJavaScriptEs6ImportExport(); + return; + } + if (!Style.isCpp()) + break; + // Handle C++ "(inline|export) namespace". + LLVM_FALLTHROUGH; + case tok::kw_inline: + nextToken(); + if (FormatTok->Tok.is(tok::kw_namespace)) { + parseNamespace(); + return; + } + break; + case tok::identifier: + if (FormatTok->is(TT_ForEachMacro)) { + parseForOrWhileLoop(); + return; + } + if (FormatTok->is(TT_MacroBlockBegin)) { + parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, + /*MunchSemi=*/false); + return; + } + if (FormatTok->is(Keywords.kw_import)) { + if (Style.Language == FormatStyle::LK_JavaScript) { + parseJavaScriptEs6ImportExport(); + return; + } + if (Style.Language == FormatStyle::LK_Proto) { + nextToken(); + if (FormatTok->is(tok::kw_public)) + nextToken(); + if (!FormatTok->is(tok::string_literal)) + return; + nextToken(); + if (FormatTok->is(tok::semi)) + nextToken(); + addUnwrappedLine(); + return; + } + } + if (Style.isCpp() && + FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, + Keywords.kw_slots, Keywords.kw_qslots)) { + nextToken(); + if (FormatTok->is(tok::colon)) { + nextToken(); + addUnwrappedLine(); + return; + } + } + if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { + parseStatementMacro(); + return; + } + if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { + parseNamespace(); + return; + } + // In all other cases, parse the declaration. + break; + default: + break; + } + do { + const FormatToken *Previous = FormatTok->Previous; + switch (FormatTok->Tok.getKind()) { + case tok::at: + nextToken(); + if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); + parseBracedList(); + break; + } else if (Style.Language == FormatStyle::LK_Java && + FormatTok->is(Keywords.kw_interface)) { + nextToken(); + break; + } + switch (FormatTok->Tok.getObjCKeywordID()) { + case tok::objc_public: + case tok::objc_protected: + case tok::objc_package: + case tok::objc_private: + return parseAccessSpecifier(); + case tok::objc_interface: + case tok::objc_implementation: + return parseObjCInterfaceOrImplementation(); + case tok::objc_protocol: + if (parseObjCProtocol()) + return; + break; + case tok::objc_end: + return; // Handled by the caller. + case tok::objc_optional: + case tok::objc_required: + nextToken(); + addUnwrappedLine(); + return; + case tok::objc_autoreleasepool: + nextToken(); + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BraceWrapping.AfterControlStatement == + FormatStyle::BWACS_Always) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); + } + addUnwrappedLine(); + return; + case tok::objc_synchronized: + nextToken(); + if (FormatTok->Tok.is(tok::l_paren)) + // Skip synchronization object + parseParens(); + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BraceWrapping.AfterControlStatement == + FormatStyle::BWACS_Always) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); + } + addUnwrappedLine(); + return; + case tok::objc_try: + // This branch isn't strictly necessary (the kw_try case below would + // do this too after the tok::at is parsed above). But be explicit. + parseTryCatch(); + return; + default: + break; + } + break; + case tok::kw_enum: + // Ignore if this is part of "template is(tok::less)) { + nextToken(); + break; + } + + // parseEnum falls through and does not yet add an unwrapped line as an + // enum definition can start a structural element. + if (!parseEnum()) + break; + // This only applies for C++. + if (!Style.isCpp()) { + addUnwrappedLine(); + return; + } + break; + case tok::kw_typedef: + nextToken(); + if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, + Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, + Keywords.kw_CF_CLOSED_ENUM, + Keywords.kw_NS_CLOSED_ENUM)) + parseEnum(); + break; + case tok::kw_struct: + case tok::kw_union: + case tok::kw_class: + // parseRecord falls through and does not yet add an unwrapped line as a + // record declaration or definition can start a structural element. + parseRecord(); + // This does not apply for Java, JavaScript and C#. + if (Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) { + if (FormatTok->is(tok::semi)) + nextToken(); + addUnwrappedLine(); + return; + } + break; + case tok::period: + nextToken(); + // In Java, classes have an implicit static member "class". + if (Style.Language == FormatStyle::LK_Java && FormatTok && + FormatTok->is(tok::kw_class)) + nextToken(); + if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && + FormatTok->Tok.getIdentifierInfo()) + // JavaScript only has pseudo keywords, all keywords are allowed to + // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 + nextToken(); + break; + case tok::semi: + nextToken(); + addUnwrappedLine(); + return; + case tok::r_brace: + addUnwrappedLine(); + return; + case tok::l_paren: + parseParens(); + break; + case tok::kw_operator: + nextToken(); + if (FormatTok->isBinaryOperator()) + nextToken(); + break; + case tok::caret: + nextToken(); + if (FormatTok->Tok.isAnyIdentifier() || + FormatTok->isSimpleTypeSpecifier()) + nextToken(); + if (FormatTok->is(tok::l_paren)) + parseParens(); + if (FormatTok->is(tok::l_brace)) + parseChildBlock(); + break; + case tok::l_brace: + if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { + // A block outside of parentheses must be the last part of a + // structural element. + // FIXME: Figure out cases where this is not true, and add projections + // for them (the one we know is missing are lambdas). + if (Style.BraceWrapping.AfterFunction) + addUnwrappedLine(); + FormatTok->Type = TT_FunctionLBrace; + parseBlock(/*MustBeDeclaration=*/false); + addUnwrappedLine(); + return; + } + // Otherwise this was a braced init list, and the structural + // element continues. + break; + case tok::kw_try: + if (Style.Language == FormatStyle::LK_JavaScript && + Line->MustBeDeclaration) { + // field/method declaration. + nextToken(); + break; + } + // We arrive here when parsing function-try blocks. + if (Style.BraceWrapping.AfterFunction) + addUnwrappedLine(); + parseTryCatch(); + return; + case tok::identifier: { + if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && + Line->MustBeDeclaration) { + addUnwrappedLine(); + parseCSharpGenericTypeConstraint(); + break; + } + if (FormatTok->is(TT_MacroBlockEnd)) { + addUnwrappedLine(); + return; + } + + // Function declarations (as opposed to function expressions) are parsed + // on their own unwrapped line by continuing this loop. Function + // expressions (functions that are not on their own line) must not create + // a new unwrapped line, so they are special cased below. + size_t TokenCount = Line->Tokens.size(); + if (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->is(Keywords.kw_function) && + (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( + Keywords.kw_async)))) { + tryToParseJSFunction(); + break; + } + if ((Style.Language == FormatStyle::LK_JavaScript || + Style.Language == FormatStyle::LK_Java) && + FormatTok->is(Keywords.kw_interface)) { + if (Style.Language == FormatStyle::LK_JavaScript) { + // In JavaScript/TypeScript, "interface" can be used as a standalone + // identifier, e.g. in `var interface = 1;`. If "interface" is + // followed by another identifier, it is very like to be an actual + // interface declaration. + unsigned StoredPosition = Tokens->getPosition(); + FormatToken *Next = Tokens->getNextToken(); + FormatTok = Tokens->setPosition(StoredPosition); + if (Next && !mustBeJSIdent(Keywords, Next)) { + nextToken(); + break; + } + } + parseRecord(); + addUnwrappedLine(); + return; + } + + if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { + parseStatementMacro(); + return; + } + + // See if the following token should start a new unwrapped line. + StringRef Text = FormatTok->TokenText; + nextToken(); + + // JS doesn't have macros, and within classes colons indicate fields, not + // labels. + if (Style.Language == FormatStyle::LK_JavaScript) + break; + + TokenCount = Line->Tokens.size(); + if (TokenCount == 1 || + (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { + if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { + Line->Tokens.begin()->Tok->MustBreakBefore = true; + parseLabel(!Style.IndentGotoLabels); + return; + } + // Recognize function-like macro usages without trailing semicolon as + // well as free-standing macros like Q_OBJECT. + bool FunctionLike = FormatTok->is(tok::l_paren); + if (FunctionLike) + parseParens(); + + bool FollowedByNewline = + CommentsBeforeNextToken.empty() + ? FormatTok->NewlinesBefore > 0 + : CommentsBeforeNextToken.front()->NewlinesBefore > 0; + + if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && + tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { + addUnwrappedLine(); + return; + } + } + break; + } + case tok::equal: + // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType + // TT_JsFatArrow. The always start an expression or a child block if + // followed by a curly. + if (FormatTok->is(TT_JsFatArrow)) { + nextToken(); + if (FormatTok->is(tok::l_brace)) + parseChildBlock(); + break; + } + + nextToken(); + if (FormatTok->Tok.is(tok::l_brace)) { + // Block kind should probably be set to BK_BracedInit for any language. + // C# needs this change to ensure that array initialisers and object + // initialisers are indented the same way. + if (Style.isCSharp()) + FormatTok->BlockKind = BK_BracedInit; + nextToken(); + parseBracedList(); + } else if (Style.Language == FormatStyle::LK_Proto && + FormatTok->Tok.is(tok::less)) { + nextToken(); + parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, + /*ClosingBraceKind=*/tok::greater); + } + break; + case tok::l_square: + parseSquare(); + break; + case tok::kw_new: + parseNew(); + break; + default: + nextToken(); + break; + } + } while (!eof()); +} + +bool UnwrappedLineParser::tryToParsePropertyAccessor() { + assert(FormatTok->is(tok::l_brace)); + if (!Style.isCSharp()) + return false; + // See if it's a property accessor. + if (FormatTok->Previous->isNot(tok::identifier)) + return false; + + // See if we are inside a property accessor. + // + // Record the current tokenPosition so that we can advance and + // reset the current token. `Next` is not set yet so we need + // another way to advance along the token stream. + unsigned int StoredPosition = Tokens->getPosition(); + FormatToken *Tok = Tokens->getNextToken(); + + // A trivial property accessor is of the form: + // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] } + // Track these as they do not require line breaks to be introduced. + bool HasGetOrSet = false; + bool IsTrivialPropertyAccessor = true; + while (!eof()) { + if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, + tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, + Keywords.kw_set)) { + if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set)) + HasGetOrSet = true; + Tok = Tokens->getNextToken(); + continue; + } + if (Tok->isNot(tok::r_brace)) + IsTrivialPropertyAccessor = false; + break; + } + + if (!HasGetOrSet) { + Tokens->setPosition(StoredPosition); + return false; + } + + // Try to parse the property accessor: + // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties + Tokens->setPosition(StoredPosition); + nextToken(); + do { + switch (FormatTok->Tok.getKind()) { + case tok::r_brace: + nextToken(); + if (FormatTok->is(tok::equal)) { + while (!eof() && FormatTok->isNot(tok::semi)) + nextToken(); + nextToken(); + } + addUnwrappedLine(); + return true; + case tok::l_brace: + ++Line->Level; + parseBlock(/*MustBeDeclaration=*/true); + addUnwrappedLine(); + --Line->Level; + break; + case tok::equal: + if (FormatTok->is(TT_JsFatArrow)) { + ++Line->Level; + do { + nextToken(); + } while (!eof() && FormatTok->isNot(tok::semi)); + nextToken(); + addUnwrappedLine(); + --Line->Level; + break; + } + nextToken(); + break; + default: + if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) && + !IsTrivialPropertyAccessor) { + // Non-trivial get/set needs to be on its own line. + addUnwrappedLine(); + } + nextToken(); + } + } while (!eof()); + + // Unreachable for well-formed code (paired '{' and '}'). + return true; +} + +bool UnwrappedLineParser::tryToParseLambda() { + if (!Style.isCpp()) { + nextToken(); + return false; + } + assert(FormatTok->is(tok::l_square)); + FormatToken &LSquare = *FormatTok; + if (!tryToParseLambdaIntroducer()) + return false; + + bool SeenArrow = false; + + while (FormatTok->isNot(tok::l_brace)) { + if (FormatTok->isSimpleTypeSpecifier()) { + nextToken(); + continue; + } + switch (FormatTok->Tok.getKind()) { + case tok::l_brace: + break; + case tok::l_paren: + parseParens(); + break; + case tok::l_square: + parseSquare(); + nextToken(); + break; + case tok::amp: + case tok::star: + case tok::kw_const: + case tok::comma: + case tok::less: + case tok::greater: + case tok::identifier: + case tok::numeric_constant: + case tok::coloncolon: + case tok::kw_class: + case tok::kw_mutable: + case tok::kw_noexcept: + case tok::kw_template: + case tok::kw_typename: + nextToken(); + break; + // Specialization of a template with an integer parameter can contain + // arithmetic, logical, comparison and ternary operators. + // + // FIXME: This also accepts sequences of operators that are not in the scope + // of a template argument list. + // + // In a C++ lambda a template type can only occur after an arrow. We use + // this as an heuristic to distinguish between Objective-C expressions + // followed by an `a->b` expression, such as: + // ([obj func:arg] + a->b) + // Otherwise the code below would parse as a lambda. + // + // FIXME: This heuristic is incorrect for C++20 generic lambdas with + // explicit template lists: [](U &&u){} + case tok::plus: + case tok::minus: + case tok::exclaim: + case tok::tilde: + case tok::slash: + case tok::percent: + case tok::lessless: + case tok::pipe: + case tok::pipepipe: + case tok::ampamp: + case tok::caret: + case tok::equalequal: + case tok::exclaimequal: + case tok::greaterequal: + case tok::lessequal: + case tok::question: + case tok::colon: + case tok::ellipsis: + case tok::kw_true: + case tok::kw_false: + if (SeenArrow) { + nextToken(); + break; + } + return true; + case tok::arrow: + // This might or might not actually be a lambda arrow (this could be an + // ObjC method invocation followed by a dereferencing arrow). We might + // reset this back to TT_Unknown in TokenAnnotator. + FormatTok->Type = TT_LambdaArrow; + SeenArrow = true; + nextToken(); + break; + default: + return true; + } + } + FormatTok->Type = TT_LambdaLBrace; + LSquare.Type = TT_LambdaLSquare; + parseChildBlock(); + return true; +} + +bool UnwrappedLineParser::tryToParseLambdaIntroducer() { + const FormatToken *Previous = FormatTok->Previous; + if (Previous && + (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, + tok::kw_delete, tok::l_square) || + FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || + Previous->isSimpleTypeSpecifier())) { + nextToken(); + return false; + } + nextToken(); + if (FormatTok->is(tok::l_square)) { + return false; + } + parseSquare(/*LambdaIntroducer=*/true); + return true; +} + +void UnwrappedLineParser::tryToParseJSFunction() { + assert(FormatTok->is(Keywords.kw_function) || + FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); + if (FormatTok->is(Keywords.kw_async)) + nextToken(); + // Consume "function". + nextToken(); + + // Consume * (generator function). Treat it like C++'s overloaded operators. + if (FormatTok->is(tok::star)) { + FormatTok->Type = TT_OverloadedOperator; + nextToken(); + } + + // Consume function name. + if (FormatTok->is(tok::identifier)) + nextToken(); + + if (FormatTok->isNot(tok::l_paren)) + return; + + // Parse formal parameter list. + parseParens(); + + if (FormatTok->is(tok::colon)) { + // Parse a type definition. + nextToken(); + + // Eat the type declaration. For braced inline object types, balance braces, + // otherwise just parse until finding an l_brace for the function body. + if (FormatTok->is(tok::l_brace)) + tryToParseBracedList(); + else + while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) + nextToken(); + } + + if (FormatTok->is(tok::semi)) + return; + + parseChildBlock(); +} + +bool UnwrappedLineParser::tryToParseBracedList() { + if (FormatTok->BlockKind == BK_Unknown) + calculateBraceTypes(); + assert(FormatTok->BlockKind != BK_Unknown); + if (FormatTok->BlockKind == BK_Block) + return false; + nextToken(); + parseBracedList(); + return true; +} + +bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, + bool IsEnum, + tok::TokenKind ClosingBraceKind) { + bool HasError = false; + + // FIXME: Once we have an expression parser in the UnwrappedLineParser, + // replace this by using parseAssigmentExpression() inside. + do { + if (Style.isCSharp()) { + if (FormatTok->is(TT_JsFatArrow)) { + nextToken(); + // Fat arrows can be followed by simple expressions or by child blocks + // in curly braces. + if (FormatTok->is(tok::l_brace)) { + parseChildBlock(); + continue; + } + } + } + if (Style.Language == FormatStyle::LK_JavaScript) { + if (FormatTok->is(Keywords.kw_function) || + FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { + tryToParseJSFunction(); + continue; + } + if (FormatTok->is(TT_JsFatArrow)) { + nextToken(); + // Fat arrows can be followed by simple expressions or by child blocks + // in curly braces. + if (FormatTok->is(tok::l_brace)) { + parseChildBlock(); + continue; + } + } + if (FormatTok->is(tok::l_brace)) { + // Could be a method inside of a braced list `{a() { return 1; }}`. + if (tryToParseBracedList()) + continue; + parseChildBlock(); + } + } + if (FormatTok->Tok.getKind() == ClosingBraceKind) { + if (IsEnum && !Style.AllowShortEnumsOnASingleLine) + addUnwrappedLine(); + nextToken(); + return !HasError; + } + switch (FormatTok->Tok.getKind()) { + case tok::caret: + nextToken(); + if (FormatTok->is(tok::l_brace)) { + parseChildBlock(); + } + break; + case tok::l_square: + if (Style.isCSharp()) + parseSquare(); + else + tryToParseLambda(); + break; + case tok::l_paren: + parseParens(); + // JavaScript can just have free standing methods and getters/setters in + // object literals. Detect them by a "{" following ")". + if (Style.Language == FormatStyle::LK_JavaScript) { + if (FormatTok->is(tok::l_brace)) + parseChildBlock(); + break; + } + break; + case tok::l_brace: + // Assume there are no blocks inside a braced init list apart + // from the ones we explicitly parse out (like lambdas). + FormatTok->BlockKind = BK_BracedInit; + nextToken(); + parseBracedList(); + break; + case tok::less: + if (Style.Language == FormatStyle::LK_Proto) { + nextToken(); + parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, + /*ClosingBraceKind=*/tok::greater); + } else { + nextToken(); + } + break; + case tok::semi: + // JavaScript (or more precisely TypeScript) can have semicolons in braced + // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be + // used for error recovery if we have otherwise determined that this is + // a braced list. + if (Style.Language == FormatStyle::LK_JavaScript) { + nextToken(); + break; + } + HasError = true; + if (!ContinueOnSemicolons) + return !HasError; + nextToken(); + break; + case tok::comma: + nextToken(); + if (IsEnum && !Style.AllowShortEnumsOnASingleLine) + addUnwrappedLine(); + break; + default: + nextToken(); + break; + } + } while (!eof()); + return false; +} + +void UnwrappedLineParser::parseParens() { + assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); + nextToken(); + do { + switch (FormatTok->Tok.getKind()) { + case tok::l_paren: + parseParens(); + if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) + parseChildBlock(); + break; + case tok::r_paren: + nextToken(); + return; + case tok::r_brace: + // A "}" inside parenthesis is an error if there wasn't a matching "{". + return; + case tok::l_square: + tryToParseLambda(); + break; + case tok::l_brace: + if (!tryToParseBracedList()) + parseChildBlock(); + break; + case tok::at: + nextToken(); + if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); + parseBracedList(); + } + break; + case tok::kw_class: + if (Style.Language == FormatStyle::LK_JavaScript) + parseRecord(/*ParseAsExpr=*/true); + else + nextToken(); + break; + case tok::identifier: + if (Style.Language == FormatStyle::LK_JavaScript && + (FormatTok->is(Keywords.kw_function) || + FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) + tryToParseJSFunction(); + else + nextToken(); + break; + default: + nextToken(); + break; + } + } while (!eof()); +} + +void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { + if (!LambdaIntroducer) { + assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); + if (tryToParseLambda()) + return; + } + do { + switch (FormatTok->Tok.getKind()) { + case tok::l_paren: + parseParens(); + break; + case tok::r_square: + nextToken(); + return; + case tok::r_brace: + // A "}" inside parenthesis is an error if there wasn't a matching "{". + return; + case tok::l_square: + parseSquare(); + break; + case tok::l_brace: { + if (!tryToParseBracedList()) + parseChildBlock(); + break; + } + case tok::at: + nextToken(); + if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); + parseBracedList(); + } + break; + default: + nextToken(); + break; + } + } while (!eof()); +} + +void UnwrappedLineParser::parseIfThenElse() { + assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); + nextToken(); + if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier)) + nextToken(); + if (FormatTok->Tok.is(tok::l_paren)) + parseParens(); + bool NeedsUnwrappedLine = false; + if (FormatTok->Tok.is(tok::l_brace)) { + CompoundStatementIndenter Indenter(this, Style, Line->Level); + parseBlock(/*MustBeDeclaration=*/false); + if (Style.BraceWrapping.BeforeElse) + addUnwrappedLine(); + else + NeedsUnwrappedLine = true; + } else { + addUnwrappedLine(); + ++Line->Level; + parseStructuralElement(); + --Line->Level; + } + if (FormatTok->Tok.is(tok::kw_else)) { + nextToken(); + if (FormatTok->Tok.is(tok::l_brace)) { + CompoundStatementIndenter Indenter(this, Style, Line->Level); + parseBlock(/*MustBeDeclaration=*/false); + addUnwrappedLine(); + } else if (FormatTok->Tok.is(tok::kw_if)) { + parseIfThenElse(); + } else { + addUnwrappedLine(); + ++Line->Level; + parseStructuralElement(); + if (FormatTok->is(tok::eof)) + addUnwrappedLine(); + --Line->Level; + } + } else if (NeedsUnwrappedLine) { + addUnwrappedLine(); + } +} + +void UnwrappedLineParser::parseTryCatch() { + assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); + nextToken(); + bool NeedsUnwrappedLine = false; + if (FormatTok->is(tok::colon)) { + // We are in a function try block, what comes is an initializer list. + nextToken(); + + // In case identifiers were removed by clang-tidy, what might follow is + // multiple commas in sequence - before the first identifier. + while (FormatTok->is(tok::comma)) + nextToken(); + + while (FormatTok->is(tok::identifier)) { + nextToken(); + if (FormatTok->is(tok::l_paren)) + parseParens(); + + // In case identifiers were removed by clang-tidy, what might follow is + // multiple commas in sequence - after the first identifier. + while (FormatTok->is(tok::comma)) + nextToken(); + } + } + // Parse try with resource. + if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { + parseParens(); + } + if (FormatTok->is(tok::l_brace)) { + CompoundStatementIndenter Indenter(this, Style, Line->Level); + parseBlock(/*MustBeDeclaration=*/false); + if (Style.BraceWrapping.BeforeCatch) { + addUnwrappedLine(); + } else { + NeedsUnwrappedLine = true; + } + } else if (!FormatTok->is(tok::kw_catch)) { + // The C++ standard requires a compound-statement after a try. + // If there's none, we try to assume there's a structuralElement + // and try to continue. + addUnwrappedLine(); + ++Line->Level; + parseStructuralElement(); + --Line->Level; + } + while (1) { + if (FormatTok->is(tok::at)) + nextToken(); + if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, + tok::kw___finally) || + ((Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) && + FormatTok->is(Keywords.kw_finally)) || + (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || + FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) + break; + nextToken(); + while (FormatTok->isNot(tok::l_brace)) { + if (FormatTok->is(tok::l_paren)) { + parseParens(); + continue; + } + if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) + return; + nextToken(); + } + NeedsUnwrappedLine = false; + CompoundStatementIndenter Indenter(this, Style, Line->Level); + parseBlock(/*MustBeDeclaration=*/false); + if (Style.BraceWrapping.BeforeCatch) + addUnwrappedLine(); + else + NeedsUnwrappedLine = true; + } + if (NeedsUnwrappedLine) + addUnwrappedLine(); +} + +void UnwrappedLineParser::parseNamespace() { + assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && + "'namespace' expected"); + + const FormatToken &InitialToken = *FormatTok; + nextToken(); + if (InitialToken.is(TT_NamespaceMacro)) { + parseParens(); + } else { + while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, + tok::l_square)) { + if (FormatTok->is(tok::l_square)) + parseSquare(); + else + nextToken(); + } + } + if (FormatTok->Tok.is(tok::l_brace)) { + if (ShouldBreakBeforeBrace(Style, InitialToken)) + addUnwrappedLine(); + + bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || + (Style.NamespaceIndentation == FormatStyle::NI_Inner && + DeclarationScopeStack.size() > 1); + parseBlock(/*MustBeDeclaration=*/true, AddLevel); + // Munch the semicolon after a namespace. This is more common than one would + // think. Putting the semicolon into its own line is very ugly. + if (FormatTok->Tok.is(tok::semi)) + nextToken(); + addUnwrappedLine(); + } + // FIXME: Add error handling. +} + +void UnwrappedLineParser::parseNew() { + assert(FormatTok->is(tok::kw_new) && "'new' expected"); + nextToken(); + + if (Style.isCSharp()) { + do { + if (FormatTok->is(tok::l_brace)) + parseBracedList(); + + if (FormatTok->isOneOf(tok::semi, tok::comma)) + return; + + nextToken(); + } while (!eof()); + } + + if (Style.Language != FormatStyle::LK_Java) + return; + + // In Java, we can parse everything up to the parens, which aren't optional. + do { + // There should not be a ;, { or } before the new's open paren. + if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) + return; + + // Consume the parens. + if (FormatTok->is(tok::l_paren)) { + parseParens(); + + // If there is a class body of an anonymous class, consume that as child. + if (FormatTok->is(tok::l_brace)) + parseChildBlock(); + return; + } + nextToken(); + } while (!eof()); +} + +void UnwrappedLineParser::parseForOrWhileLoop() { + assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && + "'for', 'while' or foreach macro expected"); + nextToken(); + // JS' for await ( ... + if (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->is(Keywords.kw_await)) + nextToken(); + if (FormatTok->Tok.is(tok::l_paren)) + parseParens(); + if (FormatTok->Tok.is(tok::l_brace)) { + CompoundStatementIndenter Indenter(this, Style, Line->Level); + parseBlock(/*MustBeDeclaration=*/false); + addUnwrappedLine(); + } else { + addUnwrappedLine(); + ++Line->Level; + parseStructuralElement(); + --Line->Level; + } +} + +void UnwrappedLineParser::parseDoWhile() { + assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); + nextToken(); + if (FormatTok->Tok.is(tok::l_brace)) { + CompoundStatementIndenter Indenter(this, Style, Line->Level); + parseBlock(/*MustBeDeclaration=*/false); + if (Style.BraceWrapping.IndentBraces) + addUnwrappedLine(); + } else { + addUnwrappedLine(); + ++Line->Level; + parseStructuralElement(); + --Line->Level; + } + + // FIXME: Add error handling. + if (!FormatTok->Tok.is(tok::kw_while)) { + addUnwrappedLine(); + return; + } + + nextToken(); + parseStructuralElement(); +} + +void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { + nextToken(); + unsigned OldLineLevel = Line->Level; + if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) + --Line->Level; + if (LeftAlignLabel) + Line->Level = 0; + if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && + FormatTok->Tok.is(tok::l_brace)) { + CompoundStatementIndenter Indenter(this, Line->Level, + Style.BraceWrapping.AfterCaseLabel, + Style.BraceWrapping.IndentBraces); + parseBlock(/*MustBeDeclaration=*/false); + if (FormatTok->Tok.is(tok::kw_break)) { + if (Style.BraceWrapping.AfterControlStatement == + FormatStyle::BWACS_Always) + addUnwrappedLine(); + parseStructuralElement(); + } + addUnwrappedLine(); + } else { + if (FormatTok->is(tok::semi)) + nextToken(); + addUnwrappedLine(); + } + Line->Level = OldLineLevel; + if (FormatTok->isNot(tok::l_brace)) { + parseStructuralElement(); + addUnwrappedLine(); + } +} + +void UnwrappedLineParser::parseCaseLabel() { + assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); + // FIXME: fix handling of complex expressions here. + do { + nextToken(); + } while (!eof() && !FormatTok->Tok.is(tok::colon)); + parseLabel(); +} + +void UnwrappedLineParser::parseSwitch() { + assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); + nextToken(); + if (FormatTok->Tok.is(tok::l_paren)) + parseParens(); + if (FormatTok->Tok.is(tok::l_brace)) { + CompoundStatementIndenter Indenter(this, Style, Line->Level); + parseBlock(/*MustBeDeclaration=*/false); + addUnwrappedLine(); + } else { + addUnwrappedLine(); + ++Line->Level; + parseStructuralElement(); + --Line->Level; + } +} + +void UnwrappedLineParser::parseAccessSpecifier() { + nextToken(); + // Understand Qt's slots. + if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) + nextToken(); + // Otherwise, we don't know what it is, and we'd better keep the next token. + if (FormatTok->Tok.is(tok::colon)) + nextToken(); + addUnwrappedLine(); +} + +bool UnwrappedLineParser::parseEnum() { + // Won't be 'enum' for NS_ENUMs. + if (FormatTok->Tok.is(tok::kw_enum)) + nextToken(); + + // In TypeScript, "enum" can also be used as property name, e.g. in interface + // declarations. An "enum" keyword followed by a colon would be a syntax + // error and thus assume it is just an identifier. + if (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->isOneOf(tok::colon, tok::question)) + return false; + + // In protobuf, "enum" can be used as a field name. + if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) + return false; + + // Eat up enum class ... + if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) + nextToken(); + + while (FormatTok->Tok.getIdentifierInfo() || + FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, + tok::greater, tok::comma, tok::question)) { + nextToken(); + // We can have macros or attributes in between 'enum' and the enum name. + if (FormatTok->is(tok::l_paren)) + parseParens(); + if (FormatTok->is(tok::identifier)) { + nextToken(); + // If there are two identifiers in a row, this is likely an elaborate + // return type. In Java, this can be "implements", etc. + if (Style.isCpp() && FormatTok->is(tok::identifier)) + return false; + } + } + + // Just a declaration or something is wrong. + if (FormatTok->isNot(tok::l_brace)) + return true; + FormatTok->BlockKind = BK_Block; + + if (Style.Language == FormatStyle::LK_Java) { + // Java enums are different. + parseJavaEnumBody(); + return true; + } + if (Style.Language == FormatStyle::LK_Proto) { + parseBlock(/*MustBeDeclaration=*/true); + return true; + } + + if (!Style.AllowShortEnumsOnASingleLine) + addUnwrappedLine(); + // Parse enum body. + nextToken(); + if (!Style.AllowShortEnumsOnASingleLine) { + addUnwrappedLine(); + Line->Level += 1; + } + bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, + /*IsEnum=*/true); + if (!Style.AllowShortEnumsOnASingleLine) + Line->Level -= 1; + if (HasError) { + if (FormatTok->is(tok::semi)) + nextToken(); + addUnwrappedLine(); + } + return true; + + // There is no addUnwrappedLine() here so that we fall through to parsing a + // structural element afterwards. Thus, in "enum A {} n, m;", + // "} n, m;" will end up in one unwrapped line. +} + +void UnwrappedLineParser::parseJavaEnumBody() { + // Determine whether the enum is simple, i.e. does not have a semicolon or + // constants with class bodies. Simple enums can be formatted like braced + // lists, contracted to a single line, etc. + unsigned StoredPosition = Tokens->getPosition(); + bool IsSimple = true; + FormatToken *Tok = Tokens->getNextToken(); + while (Tok) { + if (Tok->is(tok::r_brace)) + break; + if (Tok->isOneOf(tok::l_brace, tok::semi)) { + IsSimple = false; + break; + } + // FIXME: This will also mark enums with braces in the arguments to enum + // constants as "not simple". This is probably fine in practice, though. + Tok = Tokens->getNextToken(); + } + FormatTok = Tokens->setPosition(StoredPosition); + + if (IsSimple) { + nextToken(); + parseBracedList(); + addUnwrappedLine(); + return; + } + + // Parse the body of a more complex enum. + // First add a line for everything up to the "{". + nextToken(); + addUnwrappedLine(); + ++Line->Level; + + // Parse the enum constants. + while (FormatTok) { + if (FormatTok->is(tok::l_brace)) { + // Parse the constant's class body. + parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, + /*MunchSemi=*/false); + } else if (FormatTok->is(tok::l_paren)) { + parseParens(); + } else if (FormatTok->is(tok::comma)) { + nextToken(); + addUnwrappedLine(); + } else if (FormatTok->is(tok::semi)) { + nextToken(); + addUnwrappedLine(); + break; + } else if (FormatTok->is(tok::r_brace)) { + addUnwrappedLine(); + break; + } else { + nextToken(); + } + } + + // Parse the class body after the enum's ";" if any. + parseLevel(/*HasOpeningBrace=*/true); + nextToken(); + --Line->Level; + addUnwrappedLine(); +} + +void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { + const FormatToken &InitialToken = *FormatTok; + nextToken(); + + // The actual identifier can be a nested name specifier, and in macros + // it is often token-pasted. + while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, + tok::kw___attribute, tok::kw___declspec, + tok::kw_alignas, tok::l_square) || + ((Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) && + FormatTok->isOneOf(tok::period, tok::comma))) { + + if (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { + // JavaScript/TypeScript supports inline object types in + // extends/implements positions: + // class Foo implements {bar: number} { } + nextToken(); + if (FormatTok->is(tok::l_brace)) { + tryToParseBracedList(); + continue; + } + } + bool IsNonMacroIdentifier = + FormatTok->is(tok::identifier) && + FormatTok->TokenText != FormatTok->TokenText.upper(); + nextToken(); + // We can have macros or attributes in between 'class' and the class name. + if (!IsNonMacroIdentifier) { + if (FormatTok->Tok.is(tok::l_paren)) { + parseParens(); + } else if (FormatTok->Tok.is(tok::l_square)) { + parseSquare(); + nextToken(); + } + } + } + + // Note that parsing away template declarations here leads to incorrectly + // accepting function declarations as record declarations. + // In general, we cannot solve this problem. Consider: + // class A B() {} + // which can be a function definition or a class definition when B() is a + // macro. If we find enough real-world cases where this is a problem, we + // can parse for the 'template' keyword in the beginning of the statement, + // and thus rule out the record production in case there is no template + // (this would still leave us with an ambiguity between template function + // and class declarations). + if (FormatTok->isOneOf(tok::colon, tok::less)) { + while (!eof()) { + if (FormatTok->is(tok::l_brace)) { + calculateBraceTypes(/*ExpectClassBody=*/true); + if (!tryToParseBracedList()) + break; + } + if (FormatTok->Tok.is(tok::semi)) + return; + if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { + addUnwrappedLine(); + nextToken(); + parseCSharpGenericTypeConstraint(); + break; + } + nextToken(); + } + } + if (FormatTok->Tok.is(tok::l_brace)) { + if (ParseAsExpr) { + parseChildBlock(); + } else { + if (ShouldBreakBeforeBrace(Style, InitialToken)) + addUnwrappedLine(); + + parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, + /*MunchSemi=*/false); + } + } + // There is no addUnwrappedLine() here so that we fall through to parsing a + // structural element afterwards. Thus, in "class A {} n, m;", + // "} n, m;" will end up in one unwrapped line. +} + +void UnwrappedLineParser::parseObjCMethod() { + assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && + "'(' or identifier expected."); + do { + if (FormatTok->Tok.is(tok::semi)) { + nextToken(); + addUnwrappedLine(); + return; + } else if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BraceWrapping.AfterFunction) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); + addUnwrappedLine(); + return; + } else { + nextToken(); + } + } while (!eof()); +} + +void UnwrappedLineParser::parseObjCProtocolList() { + assert(FormatTok->Tok.is(tok::less) && "'<' expected."); + do { + nextToken(); + // Early exit in case someone forgot a close angle. + if (FormatTok->isOneOf(tok::semi, tok::l_brace) || + FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) + return; + } while (!eof() && FormatTok->Tok.isNot(tok::greater)); + nextToken(); // Skip '>'. +} + +void UnwrappedLineParser::parseObjCUntilAtEnd() { + do { + if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { + nextToken(); + addUnwrappedLine(); + break; + } + if (FormatTok->is(tok::l_brace)) { + parseBlock(/*MustBeDeclaration=*/false); + // In ObjC interfaces, nothing should be following the "}". + addUnwrappedLine(); + } else if (FormatTok->is(tok::r_brace)) { + // Ignore stray "}". parseStructuralElement doesn't consume them. + nextToken(); + addUnwrappedLine(); + } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { + nextToken(); + parseObjCMethod(); + } else { + parseStructuralElement(); + } + } while (!eof()); +} + +void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { + assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || + FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); + nextToken(); + nextToken(); // interface name + + // @interface can be followed by a lightweight generic + // specialization list, then either a base class or a category. + if (FormatTok->Tok.is(tok::less)) { + // Unlike protocol lists, generic parameterizations support + // nested angles: + // + // @interface Foo> : + // NSObject + // + // so we need to count how many open angles we have left. + unsigned NumOpenAngles = 1; + do { + nextToken(); + // Early exit in case someone forgot a close angle. + if (FormatTok->isOneOf(tok::semi, tok::l_brace) || + FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) + break; + if (FormatTok->Tok.is(tok::less)) + ++NumOpenAngles; + else if (FormatTok->Tok.is(tok::greater)) { + assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); + --NumOpenAngles; + } + } while (!eof() && NumOpenAngles != 0); + nextToken(); // Skip '>'. + } + if (FormatTok->Tok.is(tok::colon)) { + nextToken(); + nextToken(); // base class name + } else if (FormatTok->Tok.is(tok::l_paren)) + // Skip category, if present. + parseParens(); + + if (FormatTok->Tok.is(tok::less)) + parseObjCProtocolList(); + + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BraceWrapping.AfterObjCDeclaration) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/true); + } + + // With instance variables, this puts '}' on its own line. Without instance + // variables, this ends the @interface line. + addUnwrappedLine(); + + parseObjCUntilAtEnd(); +} + +// Returns true for the declaration/definition form of @protocol, +// false for the expression form. +bool UnwrappedLineParser::parseObjCProtocol() { + assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); + nextToken(); + + if (FormatTok->is(tok::l_paren)) + // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". + return false; + + // The definition/declaration form, + // @protocol Foo + // - (int)someMethod; + // @end + + nextToken(); // protocol name + + if (FormatTok->Tok.is(tok::less)) + parseObjCProtocolList(); + + // Check for protocol declaration. + if (FormatTok->Tok.is(tok::semi)) { + nextToken(); + addUnwrappedLine(); + return true; + } + + addUnwrappedLine(); + parseObjCUntilAtEnd(); + return true; +} + +void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { + bool IsImport = FormatTok->is(Keywords.kw_import); + assert(IsImport || FormatTok->is(tok::kw_export)); + nextToken(); + + // Consume the "default" in "export default class/function". + if (FormatTok->is(tok::kw_default)) + nextToken(); + + // Consume "async function", "function" and "default function", so that these + // get parsed as free-standing JS functions, i.e. do not require a trailing + // semicolon. + if (FormatTok->is(Keywords.kw_async)) + nextToken(); + if (FormatTok->is(Keywords.kw_function)) { + nextToken(); + return; + } + + // For imports, `export *`, `export {...}`, consume the rest of the line up + // to the terminating `;`. For everything else, just return and continue + // parsing the structural element, i.e. the declaration or expression for + // `export default`. + if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && + !FormatTok->isStringLiteral()) + return; + + while (!eof()) { + if (FormatTok->is(tok::semi)) + return; + if (Line->Tokens.empty()) { + // Common issue: Automatic Semicolon Insertion wrapped the line, so the + // import statement should terminate. + return; + } + if (FormatTok->is(tok::l_brace)) { + FormatTok->BlockKind = BK_Block; + nextToken(); + parseBracedList(); + } else { + nextToken(); + } + } +} + +void UnwrappedLineParser::parseStatementMacro() { + nextToken(); + if (FormatTok->is(tok::l_paren)) + parseParens(); + if (FormatTok->is(tok::semi)) + nextToken(); + addUnwrappedLine(); +} + +LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, + StringRef Prefix = "") { + llvm::dbgs() << Prefix << "Line(" << Line.Level + << ", FSC=" << Line.FirstStartColumn << ")" + << (Line.InPPDirective ? " MACRO" : "") << ": "; + for (std::list::const_iterator I = Line.Tokens.begin(), + E = Line.Tokens.end(); + I != E; ++I) { + llvm::dbgs() << I->Tok->Tok.getName() << "[" + << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn + << "] "; + } + for (std::list::const_iterator I = Line.Tokens.begin(), + E = Line.Tokens.end(); + I != E; ++I) { + const UnwrappedLineNode &Node = *I; + for (SmallVectorImpl::const_iterator + I = Node.Children.begin(), + E = Node.Children.end(); + I != E; ++I) { + printDebugInfo(*I, "\nChild: "); + } + } + llvm::dbgs() << "\n"; +} + +void UnwrappedLineParser::addUnwrappedLine() { + if (Line->Tokens.empty()) + return; + LLVM_DEBUG({ + if (CurrentLines == &Lines) + printDebugInfo(*Line); + }); + CurrentLines->push_back(std::move(*Line)); + Line->Tokens.clear(); + Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; + Line->FirstStartColumn = 0; + if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { + CurrentLines->append( + std::make_move_iterator(PreprocessorDirectives.begin()), + std::make_move_iterator(PreprocessorDirectives.end())); + PreprocessorDirectives.clear(); + } + // Disconnect the current token from the last token on the previous line. + FormatTok->Previous = nullptr; +} + +bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } + +bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { + return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && + FormatTok.NewlinesBefore > 0; +} + +// Checks if \p FormatTok is a line comment that continues the line comment +// section on \p Line. +static bool +continuesLineCommentSection(const FormatToken &FormatTok, + const UnwrappedLine &Line, + const llvm::Regex &CommentPragmasRegex) { + if (Line.Tokens.empty()) + return false; + + StringRef IndentContent = FormatTok.TokenText; + if (FormatTok.TokenText.startswith("//") || + FormatTok.TokenText.startswith("/*")) + IndentContent = FormatTok.TokenText.substr(2); + if (CommentPragmasRegex.match(IndentContent)) + return false; + + // If Line starts with a line comment, then FormatTok continues the comment + // section if its original column is greater or equal to the original start + // column of the line. + // + // Define the min column token of a line as follows: if a line ends in '{' or + // contains a '{' followed by a line comment, then the min column token is + // that '{'. Otherwise, the min column token of the line is the first token of + // the line. + // + // If Line starts with a token other than a line comment, then FormatTok + // continues the comment section if its original column is greater than the + // original start column of the min column token of the line. + // + // For example, the second line comment continues the first in these cases: + // + // // first line + // // second line + // + // and: + // + // // first line + // // second line + // + // and: + // + // int i; // first line + // // second line + // + // and: + // + // do { // first line + // // second line + // int i; + // } while (true); + // + // and: + // + // enum { + // a, // first line + // // second line + // b + // }; + // + // The second line comment doesn't continue the first in these cases: + // + // // first line + // // second line + // + // and: + // + // int i; // first line + // // second line + // + // and: + // + // do { // first line + // // second line + // int i; + // } while (true); + // + // and: + // + // enum { + // a, // first line + // // second line + // }; + const FormatToken *MinColumnToken = Line.Tokens.front().Tok; + + // Scan for '{//'. If found, use the column of '{' as a min column for line + // comment section continuation. + const FormatToken *PreviousToken = nullptr; + for (const UnwrappedLineNode &Node : Line.Tokens) { + if (PreviousToken && PreviousToken->is(tok::l_brace) && + isLineComment(*Node.Tok)) { + MinColumnToken = PreviousToken; + break; + } + PreviousToken = Node.Tok; + + // Grab the last newline preceding a token in this unwrapped line. + if (Node.Tok->NewlinesBefore > 0) { + MinColumnToken = Node.Tok; + } + } + if (PreviousToken && PreviousToken->is(tok::l_brace)) { + MinColumnToken = PreviousToken; + } + + return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, + MinColumnToken); +} + +void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { + bool JustComments = Line->Tokens.empty(); + for (SmallVectorImpl::const_iterator + I = CommentsBeforeNextToken.begin(), + E = CommentsBeforeNextToken.end(); + I != E; ++I) { + // Line comments that belong to the same line comment section are put on the + // same line since later we might want to reflow content between them. + // Additional fine-grained breaking of line comment sections is controlled + // by the class BreakableLineCommentSection in case it is desirable to keep + // several line comment sections in the same unwrapped line. + // + // FIXME: Consider putting separate line comment sections as children to the + // unwrapped line instead. + (*I)->ContinuesLineCommentSection = + continuesLineCommentSection(**I, *Line, CommentPragmasRegex); + if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) + addUnwrappedLine(); + pushToken(*I); + } + if (NewlineBeforeNext && JustComments) + addUnwrappedLine(); + CommentsBeforeNextToken.clear(); +} + +void UnwrappedLineParser::nextToken(int LevelDifference) { + if (eof()) + return; + flushComments(isOnNewLine(*FormatTok)); + pushToken(FormatTok); + FormatToken *Previous = FormatTok; + if (Style.Language != FormatStyle::LK_JavaScript) + readToken(LevelDifference); + else + readTokenWithJavaScriptASI(); + FormatTok->Previous = Previous; +} + +void UnwrappedLineParser::distributeComments( + const SmallVectorImpl &Comments, + const FormatToken *NextTok) { + // Whether or not a line comment token continues a line is controlled by + // the method continuesLineCommentSection, with the following caveat: + // + // Define a trail of Comments to be a nonempty proper postfix of Comments such + // that each comment line from the trail is aligned with the next token, if + // the next token exists. If a trail exists, the beginning of the maximal + // trail is marked as a start of a new comment section. + // + // For example in this code: + // + // int a; // line about a + // // line 1 about b + // // line 2 about b + // int b; + // + // the two lines about b form a maximal trail, so there are two sections, the + // first one consisting of the single comment "// line about a" and the + // second one consisting of the next two comments. + if (Comments.empty()) + return; + bool ShouldPushCommentsInCurrentLine = true; + bool HasTrailAlignedWithNextToken = false; + unsigned StartOfTrailAlignedWithNextToken = 0; + if (NextTok) { + // We are skipping the first element intentionally. + for (unsigned i = Comments.size() - 1; i > 0; --i) { + if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { + HasTrailAlignedWithNextToken = true; + StartOfTrailAlignedWithNextToken = i; + } + } + } + for (unsigned i = 0, e = Comments.size(); i < e; ++i) { + FormatToken *FormatTok = Comments[i]; + if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { + FormatTok->ContinuesLineCommentSection = false; + } else { + FormatTok->ContinuesLineCommentSection = + continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); + } + if (!FormatTok->ContinuesLineCommentSection && + (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { + ShouldPushCommentsInCurrentLine = false; + } + if (ShouldPushCommentsInCurrentLine) { + pushToken(FormatTok); + } else { + CommentsBeforeNextToken.push_back(FormatTok); + } + } +} + +void UnwrappedLineParser::readToken(int LevelDifference) { + SmallVector Comments; + do { + FormatTok = Tokens->getNextToken(); + assert(FormatTok); + while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && + (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { + distributeComments(Comments, FormatTok); + Comments.clear(); + // If there is an unfinished unwrapped line, we flush the preprocessor + // directives only after that unwrapped line was finished later. + bool SwitchToPreprocessorLines = !Line->Tokens.empty(); + ScopedLineState BlockState(*this, SwitchToPreprocessorLines); + assert((LevelDifference >= 0 || + static_cast(-LevelDifference) <= Line->Level) && + "LevelDifference makes Line->Level negative"); + Line->Level += LevelDifference; + // Comments stored before the preprocessor directive need to be output + // before the preprocessor directive, at the same level as the + // preprocessor directive, as we consider them to apply to the directive. + if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && + PPBranchLevel > 0) + Line->Level += PPBranchLevel; + flushComments(isOnNewLine(*FormatTok)); + parsePPDirective(); + } + while (FormatTok->Type == TT_ConflictStart || + FormatTok->Type == TT_ConflictEnd || + FormatTok->Type == TT_ConflictAlternative) { + if (FormatTok->Type == TT_ConflictStart) { + conditionalCompilationStart(/*Unreachable=*/false); + } else if (FormatTok->Type == TT_ConflictAlternative) { + conditionalCompilationAlternative(); + } else if (FormatTok->Type == TT_ConflictEnd) { + conditionalCompilationEnd(); + } + FormatTok = Tokens->getNextToken(); + FormatTok->MustBreakBefore = true; + } + + if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && + !Line->InPPDirective) { + continue; + } + + if (!FormatTok->Tok.is(tok::comment)) { + distributeComments(Comments, FormatTok); + Comments.clear(); + return; + } + + Comments.push_back(FormatTok); + } while (!eof()); + + distributeComments(Comments, nullptr); + Comments.clear(); +} + +void UnwrappedLineParser::pushToken(FormatToken *Tok) { + Line->Tokens.push_back(UnwrappedLineNode(Tok)); + if (MustBreakBeforeNextToken) { + Line->Tokens.back().Tok->MustBreakBefore = true; + MustBreakBeforeNextToken = false; + } +} + +} // end namespace format +} // end namespace clang Index: clang/unittests/Format/FormatTest.cpp =================================================================== --- clang/unittests/Format/FormatTest.cpp +++ clang/unittests/Format/FormatTest.cpp @@ -15849,6 +15849,138 @@ verifyFormat("operator&&(int(&&)(), class Foo);", Style); } +TEST_F(FormatTest, ConceptsAndRequires) { + FormatStyle Style = getLLVMStyle(); + Style.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; + + verifyFormat("template \n" + "concept Hashable = requires(T a) {\n" + " { std::hash{}(a) } -> std::convertible_to;\n" + "};", + Style); + verifyFormat("template \n" + "concept bool EqualityComparable = requires(T a, T b) {\n" + " { a == b } -> bool;\n" + "};", + Style); + verifyFormat("template \n" + "concept bool EqualityComparable = requires(T a, T b) {\n" + " { a == b } -> bool;\n" + " { a != b } -> bool;\n" + "};", + Style); + verifyFormat("template \n" + "concept bool EqualityComparable = requires(T a, T b) {\n" + " { a == b } -> bool;\n" + " { a != b } -> bool;\n" + "};", + Style); + + verifyFormat("template \n" + "requires Iterator\n" + "void sort(It begin, It end) {\n" + " //....\n" + "}", + Style); + + verifyFormat("template \n" + "concept Large = sizeof(T) > 10;", + Style); + + verifyFormat("template \n" + "concept FooableWith = requires(T t, U u) {\n" + " typename T::foo_type;\n" + " { t.foo(u) } -> typename T::foo_type;\n" + " t++;\n" + "};\n" + "void doFoo(FooableWith auto t) {\n" + " t.foo(3);\n" + "}", + Style); + verifyFormat("template \n" + "concept Context = sizeof(T) == 1;", + Style); + verifyFormat("template \n" + "concept Context = is_specialization_of_v;", + Style); + verifyFormat("template \n" + "concept Node = std::is_object_v;", + Style); + verifyFormat("template \n" + "concept Tree = true;", + Style); + + verifyFormat("template int g(T i) requires Concept1 {\n" + " //...\n" + "}", + Style); + + verifyFormat( + "template int g(T i) requires Concept1 && Concept2 {\n" + " //...\n" + "}", + Style); + + verifyFormat( + "template int g(T i) requires Concept1 || Concept2 {\n" + " //...\n" + "}", + Style); + + verifyFormat("template \n" + "veryveryvery_long_return_type g(T i) requires Concept1 || " + "Concept2 {\n" + " //...\n" + "}", + Style); + + verifyFormat("template \n" + "veryveryvery_long_return_type g(T i) requires Concept1 && " + "Concept2 {\n" + " //...\n" + "}", + Style); + + verifyFormat( + "template \n" + "veryveryvery_long_return_type g(T i) requires Concept1 && Concept2 {\n" + " //...\n" + "}", + Style); + + verifyFormat( + "template \n" + "veryveryvery_long_return_type g(T i) requires Concept1 || Concept2 {\n" + " //...\n" + "}", + Style); + + Style.IndentRequires = true; + verifyFormat("template \n" + " requires Iterator\n" + "void sort(It begin, It end) {\n" + " //....\n" + "}", + Style); + verifyFormat("template \n" + " requires(index_ < sizeof...(Children_))\n" + "Tree auto &child() {\n" + " // ...\n" + "}", + Style); + + Style.SpaceBeforeParens = FormatStyle::SBPO_Always; + verifyFormat("template \n" + "concept Hashable = requires (T a) {\n" + " { std::hash{}(a) } -> std::convertible_to;\n" + "};", + Style); + + verifyFormat("template \n" + " requires EqualityComparable || Same\n" + "struct equal_to;", + Style); +} } // namespace } // namespace format } // namespace clang