Index: lib/Format/BreakableToken.cpp =================================================================== --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -40,9 +40,15 @@ } } -static StringRef getLineCommentIndentPrefix(StringRef Comment) { - static const char *const KnownPrefixes[] = {"///<", "//!<", "///", "//", - "//!"}; +static StringRef getLineCommentIndentPrefix(StringRef Comment, + const FormatStyle &Style) { + static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//", + "//!"}; + static const char *const KnownTextProtoPrefixes[] = {"//", "#"}; + ArrayRef KnownPrefixes(KnownCStylePrefixes); + if (Style.Language == FormatStyle::LK_TextProto) + KnownPrefixes = KnownTextProtoPrefixes; + StringRef LongestPrefix; for (StringRef KnownPrefix : KnownPrefixes) { if (Comment.startswith(KnownPrefix)) { @@ -732,7 +738,8 @@ CurrentTok = CurrentTok->Next) { LastLineTok = LineTok; StringRef TokenText(CurrentTok->TokenText); - assert(TokenText.startswith("//")); + assert((TokenText.startswith("//") || TokenText.startswith("#")) && + "unsupported line comment prefix, '//' and '#' are supported"); size_t FirstLineIndex = Lines.size(); TokenText.split(Lines, "\n"); Content.resize(Lines.size()); @@ -745,8 +752,9 @@ // We need to trim the blanks in case this is not the first line in a // multiline comment. Then the indent is included in Lines[i]. StringRef IndentPrefix = - getLineCommentIndentPrefix(Lines[i].ltrim(Blanks)); - assert(IndentPrefix.startswith("//")); + getLineCommentIndentPrefix(Lines[i].ltrim(Blanks), Style); + assert((TokenText.startswith("//") || TokenText.startswith("#")) && + "unsupported line comment prefix, '//' and '#' are supported"); OriginalPrefix[i] = Prefix[i] = IndentPrefix; if (Lines[i].size() > Prefix[i].size() && isAlphanumeric(Lines[i][Prefix[i].size()])) { @@ -760,6 +768,9 @@ Prefix[i] = "///< "; else if (Prefix[i] == "//!<") Prefix[i] = "//!< "; + else if (Prefix[i] == "#" && + Style.Language == FormatStyle::LK_TextProto) + Prefix[i] = "# "; } Tokens[i] = LineTok; Index: lib/Format/FormatTokenLexer.h =================================================================== --- lib/Format/FormatTokenLexer.h +++ lib/Format/FormatTokenLexer.h @@ -73,6 +73,8 @@ // nested template parts by balancing curly braces. void handleTemplateStrings(); + void tryParsePythonComment(); + bool tryMerge_TMacro(); bool tryMergeConflictMarkers(); Index: lib/Format/FormatTokenLexer.cpp =================================================================== --- lib/Format/FormatTokenLexer.cpp +++ lib/Format/FormatTokenLexer.cpp @@ -50,6 +50,8 @@ tryParseJSRegexLiteral(); handleTemplateStrings(); } + if (Style.Language == FormatStyle::LK_TextProto) + tryParsePythonComment(); tryMergePreviousTokens(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; @@ -330,6 +332,27 @@ resetLexer(SourceMgr.getFileOffset(loc)); } +void FormatTokenLexer::tryParsePythonComment() { + FormatToken *HashToken = Tokens.back(); + if (HashToken->isNot(tok::hash)) + return; + // Turn the remainder of this line into a comment. + const char *CommentBegin = + Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#" + size_t From = CommentBegin - Lex->getBuffer().begin(); + size_t To = Lex->getBuffer().find_first_of('\n', From); + if (To == StringRef::npos) + To = Lex->getBuffer().size(); + size_t Len = To - From; + HashToken->Type = TT_LineComment; + HashToken->Tok.setKind(tok::comment); + HashToken->TokenText = Lex->getBuffer().substr(From, Len); + SourceLocation Loc = To < Lex->getBuffer().size() + ? Lex->getSourceLocation(CommentBegin + Len) + : SourceMgr.getLocForEndOfFile(ID); + resetLexer(SourceMgr.getFileOffset(Loc)); +} + bool FormatTokenLexer::tryMerge_TMacro() { if (Tokens.size() < 4) return false; Index: lib/Format/UnwrappedLineParser.cpp =================================================================== --- lib/Format/UnwrappedLineParser.cpp +++ lib/Format/UnwrappedLineParser.cpp @@ -56,7 +56,7 @@ }; static bool isLineComment(const FormatToken &FormatTok) { - return FormatTok.is(tok::comment) && FormatTok.TokenText.startswith("//"); + return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); } // Checks if \p FormatTok is a line comment that continues the line comment Index: unittests/Format/FormatTestComments.cpp =================================================================== --- unittests/Format/FormatTestComments.cpp +++ unittests/Format/FormatTestComments.cpp @@ -62,6 +62,12 @@ return Style; } + FormatStyle getTextProtoStyleWithColumns(unsigned ColumnLimit) { + FormatStyle Style = getGoogleStyle(FormatStyle::FormatStyle::LK_TextProto); + Style.ColumnLimit = ColumnLimit; + return Style; + } + void verifyFormat(llvm::StringRef Code, const FormatStyle &Style = getLLVMStyle()) { EXPECT_EQ(Code.str(), format(test::messUp(Code), Style)); @@ -2872,6 +2878,85 @@ " A = B;", getLLVMStyleWithColumns(40))); } + +TEST_F(FormatTestComments, PythonStyleComments) { + // Keeps a space after '#'. + EXPECT_EQ("# comment\n" + "key: value", + format("#comment\n" + "key:value", + getTextProtoStyleWithColumns(20))); + EXPECT_EQ("# comment\n" + "key: value", + format("# comment\n" + "key:value", + getTextProtoStyleWithColumns(20))); + // Breaks long comment. + EXPECT_EQ("# comment comment\n" + "# comment\n" + "key: value", + format("# comment comment comment\n" + "key:value", + getTextProtoStyleWithColumns(20))); + // Indents comments. + EXPECT_EQ("data {\n" + " # comment comment\n" + " # comment\n" + " key: value\n" + "}", + format("data {\n" + "# comment comment comment\n" + "key: value}", + getTextProtoStyleWithColumns(20))); + EXPECT_EQ("data {\n" + " # comment comment\n" + " # comment\n" + " key: value\n" + "}", + format("data {# comment comment comment\n" + "key: value}", + getTextProtoStyleWithColumns(20))); + // Reflows long comments. + EXPECT_EQ("# comment comment\n" + "# comment comment\n" + "key: value", + format("# comment comment comment\n" + "# comment\n" + "key:value", + getTextProtoStyleWithColumns(20))); + // Breaks trailing comments. + EXPECT_EQ("k: val # comment\n" + " # comment\n" + "a: 1", + format("k:val#comment comment\n" + "a:1", + getTextProtoStyleWithColumns(20))); + EXPECT_EQ("id {\n" + " k: val # comment\n" + " # comment\n" + " # line line\n" + " a: 1\n" + "}", + format("id {k:val#comment comment\n" + "# line line\n" + "a:1}", + getTextProtoStyleWithColumns(20))); + // Aligns trailing comments. + EXPECT_EQ("k: val # commen1\n" + " # commen2\n" + " # commen3\n" + "# commen4\n" + "a: 1 # commen5\n" + " # commen6\n" + " # commen7", + format("k:val#commen1 commen2\n" + " # commen3\n" + "# commen4\n" + "a:1#commen5 commen6\n" + " #commen7", + getTextProtoStyleWithColumns(20))); +} + } // end namespace } // end namespace format } // end namespace clang