diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -256,16 +256,6 @@ } else if (Contexts[Contexts.size() - 2].CaretFound) { // This is the parameter list of an ObjC block. Contexts.back().IsExpression = false; - } else if (PrevNonComment && PrevNonComment->is(tok::kw___attribute)) { - Left->setType(TT_AttributeParen); - } else if (PrevNonComment && - PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype, - tok::kw_typeof, tok::kw__Atomic, - tok::kw___underlying_type)) { - Left->setType(TT_TypeDeclarationParen); - // decltype() and typeof() usually contain expressions. - if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof)) - Contexts.back().IsExpression = true; } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) { // The first argument to a foreach macro is a declaration. Contexts.back().IsForEachMacro = true; @@ -279,6 +269,21 @@ Contexts.back().IsExpression = !IsForOrCatch; } + // Infer the role of the l_paren based on the previous token if we haven't + // detected one one yet. + if (PrevNonComment && Left->is(TT_Unknown)) { + if (PrevNonComment->is(tok::kw___attribute)) { + Left->setType(TT_AttributeParen); + } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype, + tok::kw_typeof, tok::kw__Atomic, + tok::kw___underlying_type)) { + Left->setType(TT_TypeDeclarationParen); + // decltype() and typeof() usually contain expressions. + if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof)) + Contexts.back().IsExpression = true; + } + } + if (StartsObjCMethodExpr) { Contexts.back().ColonIsObjCMethodExpr = true; Left->setType(TT_ObjCMethodExpr); diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt --- a/clang/unittests/Format/CMakeLists.txt +++ b/clang/unittests/Format/CMakeLists.txt @@ -21,6 +21,7 @@ SortImportsTestJava.cpp SortIncludesTest.cpp UsingDeclarationsSorterTest.cpp + TokenAnnotatorTest.cpp ) clang_target_link_libraries(FormatTests diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -8197,6 +8197,12 @@ verifyFormat("void f() { MACRO(A * B); }"); verifyFormat("void f() { MACRO(A & B); }"); + // This lambda was mis-formatted after D88956 (treating it as a binop): + verifyFormat("auto x = [](const decltype(x) &ptr) {};"); + verifyFormat("auto x = [](const decltype(x) *ptr) {};"); + verifyFormat("#define lambda [](const decltype(x) &ptr) {}"); + verifyFormat("#define lambda [](const decltype(x) *ptr) {}"); + verifyFormat("DatumHandle const *operator->() const { return input_; }"); verifyFormat("return options != nullptr && operator==(*options);"); diff --git a/clang/unittests/Format/TestLexer.h b/clang/unittests/Format/TestLexer.h --- a/clang/unittests/Format/TestLexer.h +++ b/clang/unittests/Format/TestLexer.h @@ -16,6 +16,9 @@ #define CLANG_UNITTESTS_FORMAT_TESTLEXER_H #include "../../lib/Format/FormatTokenLexer.h" +#include "../../lib/Format/TokenAnalyzer.h" +#include "../../lib/Format/TokenAnnotator.h" +#include "../../lib/Format/UnwrappedLineParser.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" @@ -29,7 +32,8 @@ typedef llvm::SmallVector TokenList; inline std::ostream &operator<<(std::ostream &Stream, const FormatToken &Tok) { - Stream << "(" << Tok.Tok.getName() << ", \"" << Tok.TokenText.str() << "\")"; + Stream << "(" << Tok.Tok.getName() << ", \"" << Tok.TokenText.str() << "\" , " + << getTokenTypeName(Tok.getType()) << ")"; return Stream; } inline std::ostream &operator<<(std::ostream &Stream, const TokenList &Tokens) { @@ -37,7 +41,7 @@ for (size_t I = 0, E = Tokens.size(); I != E; ++I) { Stream << (I > 0 ? ", " : "") << *Tokens[I]; } - Stream << "}"; + Stream << "} (" << Tokens.size() << " tokens)"; return Stream; } @@ -53,35 +57,62 @@ }); } -class TestLexer { +class TestLexer : public UnwrappedLineConsumer { public: TestLexer(FormatStyle Style = getLLVMStyle()) : Style(Style), SourceMgr("test.cpp", ""), IdentTable(getFormattingLangOpts(Style)) {} TokenList lex(llvm::StringRef Code) { - Buffers.push_back( - llvm::MemoryBuffer::getMemBufferCopy(Code, "")); - clang::FileID FID = - SourceMgr.get().createFileID(Buffers.back()->getMemBufferRef()); - FormatTokenLexer Lex(SourceMgr.get(), FID, 0, Style, Encoding, Allocator, - IdentTable); - auto Result = Lex.lex(); + auto Result = getNewLexer(Code).lex(); return TokenList(Result.begin(), Result.end()); } + TokenList annotate(llvm::StringRef Code) { + FormatTokenLexer Lex = getNewLexer(Code); + auto Tokens = Lex.lex(); + UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this); + Parser.parse(); + TokenAnnotator Annotator(Style, Lex.getKeywords()); + for (auto &Line : UnwrappedLines) { + AnnotatedLine Annotated(Line); + Annotator.annotate(Annotated); + } + UnwrappedLines.clear(); + return TokenList(Tokens.begin(), Tokens.end()); + } + FormatToken *id(llvm::StringRef Code) { auto Result = uneof(lex(Code)); assert(Result.size() == 1U && "Code must expand to 1 token."); return Result[0]; } +protected: + void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { + UnwrappedLines.push_back(TheLine); + } + void finishRun() override {} + + FormatTokenLexer getNewLexer(StringRef Code) { + Buffers.push_back( + llvm::MemoryBuffer::getMemBufferCopy(Code, "")); + clang::FileID FID = + SourceMgr.get().createFileID(Buffers.back()->getMemBufferRef()); + FormatTokenLexer Lex(SourceMgr.get(), FID, 0, Style, Encoding, Allocator, + IdentTable); + return FormatTokenLexer(SourceMgr.get(), FID, 0, Style, Encoding, Allocator, + IdentTable); + } + +public: FormatStyle Style; encoding::Encoding Encoding = encoding::Encoding_UTF8; std::vector> Buffers; clang::SourceManagerForFile SourceMgr; llvm::SpecificBumpPtrAllocator Allocator; IdentifierTable IdentTable; + SmallVector UnwrappedLines; }; } // namespace format diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -0,0 +1,70 @@ +//===- unittest/Format/TokenAnnotatorTest.cpp - Formatting unit tests -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Format/Format.h" + +#include "FormatTestUtils.h" +#include "TestLexer.h" +#include "gtest/gtest.h" + +namespace clang { +namespace format { +namespace { + +class TokenAnnotatorTest : public ::testing::Test { +protected: + TokenList annotate(llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + return TestLexer(Style).annotate(Code); + } +}; + +#define EXPECT_TOKEN_KIND(FormatTok, Kind) \ + EXPECT_EQ((FormatTok)->Tok.getKind(), Kind) << *(FormatTok) +#define EXPECT_TOKEN_TYPE(FormatTok, Type) \ + EXPECT_EQ((FormatTok)->getType(), Type) << *(FormatTok) +#define EXPECT_TOKEN(FormatTok, Kind, Type) \ + do { \ + EXPECT_TOKEN_KIND(FormatTok, Kind); \ + EXPECT_TOKEN_TYPE(FormatTok, Type); \ + } while (false); + +TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmpInMacroDefinition) { + // This is a regression test for mis-parsing the & after decltype as a binary + // operator instead of a reference (when inside a macro definition). + auto Tokens = annotate("auto x = [](const decltype(x) &ptr) {};"); + EXPECT_EQ(Tokens.size(), 18u) << Tokens; + EXPECT_TOKEN(Tokens[7], tok::kw_decltype, TT_Unknown); + EXPECT_TOKEN(Tokens[8], tok::l_paren, TT_TypeDeclarationParen); + EXPECT_TOKEN(Tokens[9], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen); + EXPECT_TOKEN(Tokens[11], tok::amp, TT_PointerOrReference); + // Same again with * instead of &: + Tokens = annotate("auto x = [](const decltype(x) *ptr) {};"); + EXPECT_EQ(Tokens.size(), 18u) << Tokens; + EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen); + EXPECT_TOKEN(Tokens[11], tok::star, TT_PointerOrReference); + + // Also check that we parse correctly within a macro definition: + Tokens = annotate("#define lambda [](const decltype(x) &ptr) {}"); + EXPECT_EQ(Tokens.size(), 17u) << Tokens; + EXPECT_TOKEN(Tokens[7], tok::kw_decltype, TT_Unknown); + EXPECT_TOKEN(Tokens[8], tok::l_paren, TT_TypeDeclarationParen); + EXPECT_TOKEN(Tokens[9], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen); + EXPECT_TOKEN(Tokens[11], tok::amp, TT_PointerOrReference); + // Same again with * instead of &: + Tokens = annotate("#define lambda [](const decltype(x) *ptr) {}"); + EXPECT_EQ(Tokens.size(), 17u) << Tokens; + EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen); + EXPECT_TOKEN(Tokens[11], tok::star, TT_PointerOrReference); +} + +} // namespace +} // namespace format +} // namespace clang