Index: cfe/trunk/include/clang/Format/Format.h =================================================================== --- cfe/trunk/include/clang/Format/Format.h +++ cfe/trunk/include/clang/Format/Format.h @@ -1120,7 +1120,10 @@ /// (https://developers.google.com/protocol-buffers/). LK_Proto, /// Should be used for TableGen code. - LK_TableGen + LK_TableGen, + /// Should be used for Protocol Buffer messages in text format + /// (https://developers.google.com/protocol-buffers/). + LK_TextProto }; bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; } Index: cfe/trunk/lib/Format/ContinuationIndenter.cpp =================================================================== --- cfe/trunk/lib/Format/ContinuationIndenter.cpp +++ cfe/trunk/lib/Format/ContinuationIndenter.cpp @@ -66,6 +66,16 @@ !Style.BreakBeforeInheritanceComma)); } +static bool opensProtoMessageField(const FormatToken &LessTok, + const FormatStyle &Style) { + if (LessTok.isNot(tok::less)) + return false; + return Style.Language == FormatStyle::LK_TextProto || + (Style.Language == FormatStyle::LK_Proto && + (LessTok.NestingLevel > 0 || + (LessTok.Previous && LessTok.Previous->is(tok::equal)))); +} + ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, @@ -94,6 +104,13 @@ State.LowestLevelOnLine = 0; State.IgnoreStackForComparison = false; + if (Style.Language == FormatStyle::LK_TextProto) { + // We need this in order to deal with the bin packing of text fields at + // global scope. + State.Stack.back().AvoidBinPacking = true; + State.Stack.back().BreakBeforeParameter = true; + } + // The first token has already been indented and thus consumed. moveStateToNextToken(State, DryRun, /*Newline=*/false); return State; @@ -176,7 +193,8 @@ return true; if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) || (Previous.is(TT_ArrayInitializerLSquare) && - Previous.ParameterCount > 1)) && + Previous.ParameterCount > 1) || + opensProtoMessageField(Previous, Style)) && Style.ColumnLimit > 0 && getLengthToMatchingParen(Previous) + State.Column - 1 > getColumnLimit(State)) @@ -501,13 +519,6 @@ } } -static bool lessOpensProtoMessageField(const FormatToken &LessTok, - const LineState &State) { - assert(LessTok.is(tok::less)); - return LessTok.NestingLevel > 0 || - (LessTok.Previous && LessTok.Previous->is(tok::equal)); -} - unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, bool DryRun) { FormatToken &Current = *State.NextToken; @@ -650,9 +661,7 @@ // before the corresponding } or ]. if (PreviousNonComment && (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || - (Style.Language == FormatStyle::LK_Proto && - PreviousNonComment->is(tok::less) && - lessOpensProtoMessageField(*PreviousNonComment, State)) || + opensProtoMessageField(*PreviousNonComment, Style) || (PreviousNonComment->is(TT_TemplateString) && PreviousNonComment->opensScope()))) State.Stack.back().BreakBeforeClosingBrace = true; @@ -695,7 +704,9 @@ return Current.NestingLevel == 0 ? State.FirstIndent : State.Stack.back().Indent; if ((Current.isOneOf(tok::r_brace, tok::r_square) || - (Current.is(tok::greater) && Style.Language == FormatStyle::LK_Proto)) && + (Current.is(tok::greater) && + (Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto))) && State.Stack.size() > 1) { if (Current.closesBlockOrBlockTypeList(Style)) return State.Stack[State.Stack.size() - 2].NestedBlockIndent; @@ -1050,8 +1061,7 @@ unsigned NestedBlockIndent = std::max(State.Stack.back().StartOfFunctionCall, State.Stack.back().NestedBlockIndent); if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || - (Style.Language == FormatStyle::LK_Proto && Current.is(tok::less) && - lessOpensProtoMessageField(Current, State))) { + opensProtoMessageField(Current, Style)) { if (Current.opensBlockOrBlockTypeList(Style)) { NewIndent = Style.IndentWidth + std::min(State.Column, State.Stack.back().NestedBlockIndent); @@ -1064,7 +1074,9 @@ Current.MatchingParen->Previous->is(tok::comma); AvoidBinPacking = EndsInComma || Current.is(TT_DictLiteral) || - Style.Language == FormatStyle::LK_Proto || !Style.BinPackArguments || + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto || + !Style.BinPackArguments || (NextNoComment && NextNoComment->isOneOf(TT_DesignatedInitializerPeriod, TT_DesignatedInitializerLSquare)); Index: cfe/trunk/lib/Format/Format.cpp =================================================================== --- cfe/trunk/lib/Format/Format.cpp +++ cfe/trunk/lib/Format/Format.cpp @@ -56,6 +56,7 @@ IO.enumCase(Value, "ObjC", FormatStyle::LK_ObjC); IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen); + IO.enumCase(Value, "TextProto", FormatStyle::LK_TextProto); } }; @@ -631,6 +632,12 @@ } FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { + if (Language == FormatStyle::LK_TextProto) { + FormatStyle GoogleStyle = getGoogleStyle(FormatStyle::LK_Proto); + GoogleStyle.Language = FormatStyle::LK_TextProto; + return GoogleStyle; + } + FormatStyle GoogleStyle = getLLVMStyle(); GoogleStyle.Language = Language; Index: cfe/trunk/lib/Format/FormatToken.h =================================================================== --- cfe/trunk/lib/Format/FormatToken.h +++ cfe/trunk/lib/Format/FormatToken.h @@ -466,7 +466,8 @@ (is(tok::l_brace) && (BlockKind == BK_Block || is(TT_DictLiteral) || (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || - (is(tok::less) && Style.Language == FormatStyle::LK_Proto); + (is(tok::less) && (Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto)); } /// \brief Same as opensBlockOrBlockTypeList, but for the closing token. Index: cfe/trunk/lib/Format/TokenAnnotator.cpp =================================================================== --- cfe/trunk/lib/Format/TokenAnnotator.cpp +++ cfe/trunk/lib/Format/TokenAnnotator.cpp @@ -90,7 +90,8 @@ } if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) || (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext && - Style.Language != FormatStyle::LK_Proto)) + Style.Language != FormatStyle::LK_Proto && + Style.Language != FormatStyle::LK_TextProto)) return false; // If a && or || is found and interpreted as a binary operator, this set // of angles is likely part of something like "a < b && c > d". If the @@ -453,7 +454,8 @@ FormatToken *Previous = CurrentToken->getPreviousNonComment(); if (((CurrentToken->is(tok::colon) && (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) || - Style.Language == FormatStyle::LK_Proto) && + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) && (Previous->Tok.getIdentifierInfo() || Previous->is(tok::string_literal))) Previous->Type = TT_SelectorName; @@ -536,8 +538,13 @@ } } if (Contexts.back().ColonIsDictLiteral || - Style.Language == FormatStyle::LK_Proto) { + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) { Tok->Type = TT_DictLiteral; + if (Style.Language == FormatStyle::LK_TextProto) { + if (FormatToken *Previous = Tok->getPreviousNonComment()) + Previous->Type = TT_SelectorName; + } } else if (Contexts.back().ColonIsObjCMethodExpr || Line.startsWith(TT_ObjCMethodSpecifier)) { Tok->Type = TT_ObjCMethodExpr; @@ -635,12 +642,22 @@ return false; break; case tok::l_brace: + if (Style.Language == FormatStyle::LK_TextProto) { + FormatToken *Previous =Tok->getPreviousNonComment(); + if (Previous && Previous->Type != TT_DictLiteral) + Previous->Type = TT_SelectorName; + } if (!parseBrace()) return false; break; case tok::less: if (parseAngle()) { Tok->Type = TT_TemplateOpener; + if (Style.Language == FormatStyle::LK_TextProto) { + FormatToken *Previous = Tok->getPreviousNonComment(); + if (Previous && Previous->Type != TT_DictLiteral) + Previous->Type = TT_SelectorName; + } } else { Tok->Type = TT_BinaryOperator; NonTemplateLess.insert(Tok); @@ -1572,7 +1589,8 @@ return prec::Conditional; if (NextNonComment && Current->is(TT_SelectorName) && (NextNonComment->is(TT_DictLiteral) || - (Style.Language == FormatStyle::LK_Proto && + ((Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) && NextNonComment->is(tok::less)))) return prec::Assignment; if (Current->is(TT_JsComputedPropertyName)) @@ -2274,7 +2292,8 @@ if (Style.isCpp()) { if (Left.is(tok::kw_operator)) return Right.is(tok::coloncolon); - } else if (Style.Language == FormatStyle::LK_Proto) { + } else if (Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) { if (Right.is(tok::period) && Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, Keywords.kw_repeated, Keywords.kw_extend)) Index: cfe/trunk/lib/Format/UnwrappedLineParser.cpp =================================================================== --- cfe/trunk/lib/Format/UnwrappedLineParser.cpp +++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp @@ -286,7 +286,10 @@ !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); - parseLevel(/*HasOpeningBrace=*/false); + if (Style.Language == FormatStyle::LK_TextProto) + parseBracedList(); + else + parseLevel(/*HasOpeningBrace=*/false); // Make sure to format the remaining tokens. flushComments(true); addUnwrappedLine(); @@ -832,6 +835,7 @@ case tok::at: nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); parseBracedList(); break; } @@ -996,8 +1000,10 @@ switch (FormatTok->Tok.getKind()) { case tok::at: nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); parseBracedList(); + } break; case tok::kw_enum: // Ignore if this is part of "template Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); parseBracedList(); - else if (Style.Language == FormatStyle::LK_Proto && - FormatTok->Tok.is(tok::less)) + } else if (Style.Language == FormatStyle::LK_Proto && + FormatTok->Tok.is(tok::less)) { + nextToken(); parseBracedList(/*ContinueOnSemicolons=*/false, /*ClosingBraceKind=*/tok::greater); + } break; case tok::l_square: parseSquare(); @@ -1345,6 +1354,7 @@ assert(FormatTok->BlockKind != BK_Unknown); if (FormatTok->BlockKind == BK_Block) return false; + nextToken(); parseBracedList(); return true; } @@ -1352,7 +1362,6 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, tok::TokenKind ClosingBraceKind) { bool HasError = false; - nextToken(); // FIXME: Once we have an expression parser in the UnwrappedLineParser, // replace this by using parseAssigmentExpression() inside. @@ -1407,6 +1416,7 @@ // Assume there are no blocks inside a braced init list apart // from the ones we explicitly parse out (like lambdas). FormatTok->BlockKind = BK_BracedInit; + nextToken(); parseBracedList(); break; case tok::semi: @@ -1459,8 +1469,10 @@ break; case tok::at: nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); parseBracedList(); + } break; case tok::kw_class: if (Style.Language == FormatStyle::LK_JavaScript) @@ -1508,8 +1520,10 @@ } case tok::at: nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); parseBracedList(); + } break; default: nextToken(); @@ -1836,6 +1850,7 @@ } // Parse enum body. + nextToken(); bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); if (HasError) { if (FormatTok->is(tok::semi)) @@ -1870,6 +1885,7 @@ FormatTok = Tokens->setPosition(StoredPosition); if (IsSimple) { + nextToken(); parseBracedList(); addUnwrappedLine(); return; @@ -2081,6 +2097,7 @@ } if (FormatTok->is(tok::l_brace)) { FormatTok->BlockKind = BK_Block; + nextToken(); parseBracedList(); } else { nextToken(); Index: cfe/trunk/unittests/Format/CMakeLists.txt =================================================================== --- cfe/trunk/unittests/Format/CMakeLists.txt +++ cfe/trunk/unittests/Format/CMakeLists.txt @@ -11,6 +11,7 @@ FormatTestObjC.cpp FormatTestProto.cpp FormatTestSelective.cpp + FormatTestTextProto.cpp NamespaceEndCommentsFixerTest.cpp SortImportsTestJS.cpp SortIncludesTest.cpp Index: cfe/trunk/unittests/Format/FormatTestTextProto.cpp =================================================================== --- cfe/trunk/unittests/Format/FormatTestTextProto.cpp +++ cfe/trunk/unittests/Format/FormatTestTextProto.cpp @@ -0,0 +1,251 @@ +//===- unittest/Format/FormatTestProto.cpp --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "FormatTestUtils.h" +#include "clang/Format/Format.h" +#include "llvm/Support/Debug.h" +#include "gtest/gtest.h" + +#define DEBUG_TYPE "format-test" + +namespace clang { +namespace format { + +class FormatTestTextProto : public ::testing::Test { +protected: + static std::string format(llvm::StringRef Code, unsigned Offset, + unsigned Length, const FormatStyle &Style) { + DEBUG(llvm::errs() << "---\n"); + DEBUG(llvm::errs() << Code << "\n\n"); + std::vector Ranges(1, tooling::Range(Offset, Length)); + tooling::Replacements Replaces = reformat(Style, Code, Ranges); + auto Result = applyAllReplacements(Code, Replaces); + EXPECT_TRUE(static_cast(Result)); + DEBUG(llvm::errs() << "\n" << *Result << "\n\n"); + return *Result; + } + + static std::string format(llvm::StringRef Code) { + FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto); + Style.ColumnLimit = 60; // To make writing tests easier. + return format(Code, 0, Code.size(), Style); + } + + static void verifyFormat(llvm::StringRef Code) { + EXPECT_EQ(Code.str(), format(test::messUp(Code))); + } +}; + +TEST_F(FormatTestTextProto, KeepsTopLevelEntriesFittingALine) { + verifyFormat("field_a: OK field_b: OK field_c: OK field_d: OK field_e: OK"); +} + +TEST_F(FormatTestTextProto, SupportsMessageFields) { + verifyFormat("msg_field: {}"); + + verifyFormat("msg_field: {field_a: A}"); + + verifyFormat("msg_field: {field_a: \"OK\" field_b: 123}"); + + verifyFormat("msg_field: {\n" + " field_a: 1\n" + " field_b: OK\n" + " field_c: \"OK\"\n" + " field_d: 123\n" + " field_e: 23\n" + "}"); + + verifyFormat("msg_field{}"); + + verifyFormat("msg_field{field_a: A}"); + + verifyFormat("msg_field{field_a: \"OK\" field_b: 123}"); + + verifyFormat("msg_field{\n" + " field_a: 1\n" + " field_b: OK\n" + " field_c: \"OK\"\n" + " field_d: 123\n" + " field_e: 23.0\n" + " field_f: false\n" + " field_g: 'lala'\n" + " field_h: 1234.567e-89\n" + "}"); + + verifyFormat("msg_field: {msg_field{field_a: 1}}"); + + verifyFormat("id: \"ala.bala\"\n" + "item{type: ITEM_A rank: 1 score: 90.0}\n" + "item{type: ITEM_B rank: 2 score: 70.5}\n" + "item{\n" + " type: ITEM_A\n" + " rank: 3\n" + " score: 20.0\n" + " description: \"the third item has a description\"\n" + "}"); +} + +TEST_F(FormatTestTextProto, AvoidsTopLevelBinPacking) { + verifyFormat("field_a: OK\n" + "field_b: OK\n" + "field_c: OK\n" + "field_d: OK\n" + "field_e: OK\n" + "field_f: OK"); + + verifyFormat("field_a: OK\n" + "field_b: \"OK\"\n" + "field_c: \"OK\"\n" + "msg_field: {field_d: 123}\n" + "field_e: OK\n" + "field_f: OK"); + + verifyFormat("field_a: OK\n" + "field_b: \"OK\"\n" + "field_c: \"OK\"\n" + "msg_field: {field_d: 123 field_e: OK}"); + + verifyFormat("a: {\n" + " field_a: OK\n" + " field_b{field_c: OK}\n" + " field_d: OKOKOK\n" + " field_e: OK\n" + "}"); + + verifyFormat("field_a: OK,\n" + "field_b{field_c: OK},\n" + "field_d: OKOKOK,\n" + "field_e: OK"); +} + +TEST_F(FormatTestTextProto, AddsNewlinesAfterTrailingComments) { + verifyFormat("field_a: OK // Comment\n" + "field_b: 1"); + + verifyFormat("field_a: OK\n" + "msg_field: {\n" + " field_b: OK // Comment\n" + "}"); + + verifyFormat("field_a: OK\n" + "msg_field{\n" + " field_b: OK // Comment\n" + "}"); +} + +TEST_F(FormatTestTextProto, SupportsAngleBracketMessageFields) { + // Single-line tests + verifyFormat("msg_field<>"); + verifyFormat("msg_field: <>"); + verifyFormat("msg_field"); + verifyFormat("msg_field: "); + verifyFormat("msg_field>"); + verifyFormat("msg_field>>"); + verifyFormat("msg_field: >>"); + verifyFormat("msg_field"); + verifyFormat("msg_field, field_c: OK>"); + verifyFormat("msg_field>"); + verifyFormat("msg_field: "); + verifyFormat("msg_field: , field_c: OK>"); + verifyFormat("msg_field: >"); + verifyFormat("field_a: \"OK\", msg_field: , field_c: {}"); + verifyFormat("field_a, msg_field: , field_c<>"); + verifyFormat("field_a msg_field: field_c<>"); + verifyFormat("field>, field<>> field: "); + + // Multiple lines tests + verifyFormat("msg_field<\n" + " field_a: OK\n" + " field_b: \"OK\"\n" + " field_c: 1\n" + " field_d: 12.5\n" + " field_e: OK\n" + ">"); + + verifyFormat("msg_field: <>\n" + "field_c: \"OK\",\n" + "msg_field: \n" + "field_e: OK\n" + "msg_field: "); + + verifyFormat("field_a: OK,\n" + "field_b,\n" + "field_d: <12.5>,\n" + "field_e: OK"); + + verifyFormat("field_a: OK\n" + "field_b\n" + "field_d: <12.5>\n" + "field_e: OKOKOK"); + + verifyFormat("msg_field<\n" + " field_a: OK,\n" + " field_b,\n" + " field_d: <12.5>,\n" + " field_e: OK\n" + ">"); + + verifyFormat("msg_field<\n" + " field_a: ,\n" + " field_b,\n" + " field_d: <12.5>,\n" + " field_e: OK,\n" + ">"); + + verifyFormat("msg_field: <\n" + " field_a: \"OK\"\n" + " msg_field: {field_b: OK}\n" + " field_g: OK\n" + " field_g: OK\n" + " field_g: OK\n" + ">"); + + verifyFormat("field_a{\n" + " field_d: ok\n" + " field_b: \n" + " field_d: ok\n" + " field_d: ok\n" + "}"); + + verifyFormat("field_a: {\n" + " field_d: ok\n" + " field_b: \n" + " field_d: ok\n" + " field_d: ok\n" + "}"); + + verifyFormat("field_a: >\n" + "field_b<\n" + " field_b1: <>\n" + " field_b2: ok,\n" + " field_b3: <\n" + " field_x{} // Comment\n" + " field_y: {field_z: 1}\n" + " field_w: ok\n" + " >\n" + " field{\n" + " field_x<> // Comment\n" + " field_y: \n" + " field_w: ok\n" + " msg_field: <\n" + " field: <>\n" + " field: \n" + " field: \n" + " field: \n" + " field: \n" + " field: ok\n" + " >\n" + " }\n" + ">\n" + "field: OK,\n" + "field_c>>"); +} + +} // end namespace tooling +} // end namespace clang