Index: clang/lib/Format/FormatToken.h =================================================================== --- clang/lib/Format/FormatToken.h +++ clang/lib/Format/FormatToken.h @@ -258,7 +258,7 @@ PartOfMultiVariableDeclStmt(false), ContinuesLineCommentSection(false), Finalized(false), ClosesRequiresClause(false), BlockKind(BK_Unknown), Decision(FD_Unformatted), PackingKind(PPK_Inconclusive), - Type(TT_Unknown) {} + TypeIsFinalized(false), Type(TT_Unknown) {} /// The \c Token. Token Tok; @@ -367,13 +367,31 @@ } private: + unsigned TypeIsFinalized : 1; TokenType Type; public: /// Returns the token's type, e.g. whether "<" is a template opener or /// binary operator. TokenType getType() const { return Type; } - void setType(TokenType T) { Type = T; } + void setType(TokenType T) { + assert((!TypeIsFinalized || T == Type) && + "Please use overwriteFixedType to change a fixed type."); + Type = T; + } + /// Sets the type and also the finalized flag. This prevents the type to be + /// reset in TokenAnnotator::resetTokenMetadata(). If the type needs to be set + /// to another one please use overwriteFixedType, or even better remove the + /// need to reassign the type. + void setFinalizedType(TokenType T) { + Type = T; + TypeIsFinalized = true; + } + void overwriteFixedType(TokenType T) { + TypeIsFinalized = false; + setType(T); + } + bool isTypeFinalized() const { return TypeIsFinalized; } /// The number of newlines immediately before the \c Token. /// Index: clang/lib/Format/TokenAnnotator.cpp =================================================================== --- clang/lib/Format/TokenAnnotator.cpp +++ clang/lib/Format/TokenAnnotator.cpp @@ -686,12 +686,12 @@ // array of pointers, but if '[' starts a selector then '*' is a // binary operator. if (Parent && Parent->is(TT_PointerOrReference)) - Parent->setType(TT_BinaryOperator); + Parent->overwriteFixedType(TT_BinaryOperator); } // An arrow after an ObjC method expression is not a lambda arrow. if (CurrentToken->getType() == TT_ObjCMethodExpr && CurrentToken->Next && CurrentToken->Next->is(TT_LambdaArrow)) - CurrentToken->Next->setType(TT_Unknown); + CurrentToken->Next->overwriteFixedType(TT_Unknown); Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; // FirstObjCSelectorName is set when a colon is found. This does @@ -814,7 +814,7 @@ } if (CurrentToken->is(tok::comma)) { if (Style.isJavaScript()) - Left->setType(TT_DictLiteral); + Left->overwriteFixedType(TT_DictLiteral); ++CommaCount; } if (!consumeToken()) @@ -1419,7 +1419,8 @@ // Reset token type in case we have already looked at it and then // recovered from an error (e.g. failure to find the matching >). - if (!CurrentToken->isOneOf( + if (!CurrentToken->isTypeFinalized() && + !CurrentToken->isOneOf( TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro, TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow, @@ -1430,8 +1431,7 @@ TT_RecordLBrace, TT_StructLBrace, TT_UnionLBrace, TT_RequiresClause, TT_RequiresClauseInARequiresExpression, TT_RequiresExpression, TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace, - TT_BinaryOperator, TT_CompoundRequirementLBrace, - TT_BracedListLBrace)) + TT_CompoundRequirementLBrace, TT_BracedListLBrace)) CurrentToken->setType(TT_Unknown); CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; Index: clang/lib/Format/UnwrappedLineParser.cpp =================================================================== --- clang/lib/Format/UnwrappedLineParser.cpp +++ clang/lib/Format/UnwrappedLineParser.cpp @@ -500,7 +500,7 @@ break; case tok::l_brace: if (NextLBracesType != TT_Unknown) - FormatTok->setType(NextLBracesType); + FormatTok->setFinalizedType(NextLBracesType); else if (FormatTok->Previous && FormatTok->Previous->ClosesRequiresClause) { // We need the 'default' case here to correctly parse a function @@ -1240,7 +1240,7 @@ nextToken(); while (!eof()) { if (FormatTok->is(tok::colon)) { - FormatTok->setType(TT_ModulePartitionColon); + FormatTok->setFinalizedType(TT_ModulePartitionColon); } // Handle import as we would an include statement. else if (FormatTok->is(tok::less)) { @@ -1250,7 +1250,7 @@ // literals. if (FormatTok->isNot(tok::comment) && !FormatTok->TokenText.startswith("//")) - FormatTok->setType(TT_ImplicitStringLiteral); + FormatTok->setFinalizedType(TT_ImplicitStringLiteral); nextToken(); } } @@ -1325,11 +1325,11 @@ case tok::kw_asm: nextToken(); if (FormatTok->is(tok::l_brace)) { - FormatTok->setType(TT_InlineASMBrace); + FormatTok->setFinalizedType(TT_InlineASMBrace); nextToken(); while (FormatTok && FormatTok->isNot(tok::eof)) { if (FormatTok->is(tok::r_brace)) { - FormatTok->setType(TT_InlineASMBrace); + FormatTok->setFinalizedType(TT_InlineASMBrace); nextToken(); addUnwrappedLine(); break; @@ -1651,7 +1651,7 @@ break; case tok::l_brace: if (NextLBracesType != TT_Unknown) - FormatTok->setType(NextLBracesType); + FormatTok->setFinalizedType(NextLBracesType); if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { // A block outside of parentheses must be the last part of a // structural element. @@ -1668,7 +1668,7 @@ addUnwrappedLine(); } if (!Line->InPPDirective) - FormatTok->setType(TT_FunctionLBrace); + FormatTok->setFinalizedType(TT_FunctionLBrace); parseBlock(); addUnwrappedLine(); return; @@ -1773,7 +1773,7 @@ if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { - PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro); + PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); addUnwrappedLine(); return; } @@ -1997,7 +1997,7 @@ // This might or might not actually be a lambda arrow (this could be an // ObjC method invocation followed by a dereferencing arrow). We might // reset this back to TT_Unknown in TokenAnnotator. - FormatTok->setType(TT_LambdaArrow); + FormatTok->setFinalizedType(TT_LambdaArrow); SeenArrow = true; nextToken(); break; @@ -2005,8 +2005,8 @@ return true; } } - FormatTok->setType(TT_LambdaLBrace); - LSquare.setType(TT_LambdaLSquare); + FormatTok->setFinalizedType(TT_LambdaLBrace); + LSquare.setFinalizedType(TT_LambdaLSquare); parseChildBlock(); return true; } @@ -2038,7 +2038,7 @@ // Consume * (generator function). Treat it like C++'s overloaded operators. if (FormatTok->is(tok::star)) { - FormatTok->setType(TT_OverloadedOperator); + FormatTok->setFinalizedType(TT_OverloadedOperator); nextToken(); } @@ -2246,7 +2246,7 @@ } case tok::ampamp: if (AmpAmpTokenType != TT_Unknown) - FormatTok->setType(AmpAmpTokenType); + FormatTok->setFinalizedType(AmpAmpTokenType); LLVM_FALLTHROUGH; default: nextToken(); @@ -3003,9 +3003,9 @@ !RequiresToken->Previous || RequiresToken->Previous->is(TT_RequiresExpressionLBrace); - RequiresToken->setType(InRequiresExpression - ? TT_RequiresClauseInARequiresExpression - : TT_RequiresClause); + RequiresToken->setFinalizedType(InRequiresExpression + ? TT_RequiresClauseInARequiresExpression + : TT_RequiresClause); parseConstraintExpression(); @@ -3025,15 +3025,15 @@ assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); assert(RequiresToken->getType() == TT_Unknown); - RequiresToken->setType(TT_RequiresExpression); + RequiresToken->setFinalizedType(TT_RequiresExpression); if (FormatTok->is(tok::l_paren)) { - FormatTok->setType(TT_RequiresExpressionLParen); + FormatTok->setFinalizedType(TT_RequiresExpressionLParen); parseParens(); } if (FormatTok->is(tok::l_brace)) { - FormatTok->setType(TT_RequiresExpressionLBrace); + FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); parseChildBlock(/*CanContainBracedList=*/false, /*NextLBracesType=*/TT_CompoundRequirementLBrace); } @@ -3109,7 +3109,7 @@ case tok::ampamp: case tok::pipepipe: - FormatTok->setType(TT_BinaryOperator); + FormatTok->setFinalizedType(TT_BinaryOperator); nextToken(); LambdaNextTimeAllowed = true; break; @@ -3217,7 +3217,7 @@ // Just a declaration or something is wrong. if (FormatTok->isNot(tok::l_brace)) return true; - FormatTok->setType(TT_EnumLBrace); + FormatTok->setFinalizedType(TT_EnumLBrace); FormatTok->setBlockKind(BK_Block); if (Style.Language == FormatStyle::LK_Java) { @@ -3469,7 +3469,7 @@ } }; if (FormatTok->is(tok::l_brace)) { - FormatTok->setType(GetBraceType(InitialToken)); + FormatTok->setFinalizedType(GetBraceType(InitialToken)); if (ParseAsExpr) { parseChildBlock(); } else { Index: clang/unittests/Format/TokenAnnotatorTest.cpp =================================================================== --- clang/unittests/Format/TokenAnnotatorTest.cpp +++ clang/unittests/Format/TokenAnnotatorTest.cpp @@ -70,6 +70,14 @@ EXPECT_EQ(Tokens.size(), 17u) << Tokens; EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen); EXPECT_TOKEN(Tokens[11], tok::star, TT_PointerOrReference); + + Tokens = annotate("void f() {\n" + " while (p < a && *p == 'a')\n" + " p++;\n" + "}"); + EXPECT_EQ(Tokens.size(), 21u) << Tokens; + EXPECT_TOKEN(Tokens[10], tok::ampamp, TT_BinaryOperator); + EXPECT_TOKEN(Tokens[11], tok::star, TT_UnaryOperator); } TEST_F(TokenAnnotatorTest, UnderstandsClasses) {