diff --git a/clang/lib/Format/FormatTokenSource.h b/clang/lib/Format/FormatTokenSource.h --- a/clang/lib/Format/FormatTokenSource.h +++ b/clang/lib/Format/FormatTokenSource.h @@ -1,4 +1,3 @@ - //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -8,7 +7,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file defines the \c TokenSource interface, which provides a token +/// This file defines the \c FormatTokenSource interface, which provides a token /// stream as well as the ability to manipulate the token stream. /// //===----------------------------------------------------------------------===// @@ -18,12 +17,17 @@ #include "FormatToken.h" #include "UnwrappedLineParser.h" +#include "llvm/ADT/DenseMap.h" #define DEBUG_TYPE "format-token-source" namespace clang { namespace format { +// Navigate a token stream. +// +// Enables traversal of a token stream, resetting the position in a token +// stream, as well as inserting new tokens. class FormatTokenSource { public: virtual ~FormatTokenSource() {} @@ -33,6 +37,9 @@ // Returns the token preceding the token returned by the last call to // getNextToken() in the token stream, or nullptr if no such token exists. + // + // Must not be called directly at the position directly after insertTokens() + // is called. virtual FormatToken *getPreviousToken() = 0; // Returns the token that would be returned by the next call to @@ -45,14 +52,31 @@ virtual bool isEOF() = 0; // Gets the current position in the token stream, to be used by setPosition(). + // + // Note that the value of the position is not meaningful, and specifically + // should not be used to get relative token positions. virtual unsigned getPosition() = 0; // Resets the token stream to the state it was in when getPosition() returned // Position, and return the token at that position in the stream. virtual FormatToken *setPosition(unsigned Position) = 0; + + // Insert the given tokens before the current position. + // Returns the first token in \c Tokens. + // The next returned token will be the second token in \c Tokens. + // Requires the last token in Tokens to be EOF; once the EOF token is reached, + // the next token will be the last token returned by getNextToken(); + // + // For example, given the token sequence 'a1 a2': + // getNextToken() -> a1 + // insertTokens('b1 b2') -> b1 + // getNextToken() -> b2 + // getNextToken() -> a1 + // getNextToken() -> a2 + virtual FormatToken *insertTokens(ArrayRef Tokens) = 0; }; -class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource { +class IndexedTokenSource : public FormatTokenSource { public: IndexedTokenSource(ArrayRef Tokens) : Tokens(Tokens), Position(-1) {} @@ -65,7 +89,7 @@ }); return Tokens[Position]; } - ++Position; + Position = successor(Position); LLVM_DEBUG({ llvm::dbgs() << "Next "; dbgToken(Position); @@ -74,16 +98,17 @@ } FormatToken *getPreviousToken() override { + assert(Position <= 0 || !Tokens[Position - 1]->is(tok::eof)); return Position > 0 ? Tokens[Position - 1] : nullptr; } FormatToken *peekNextToken(bool SkipComment = false) override { if (isEOF()) return Tokens[Position]; - int Next = Position + 1; + int Next = successor(Position); if (SkipComment) while (Tokens[Next]->is(tok::comment)) - ++Next; + Next = successor(Next); LLVM_DEBUG({ llvm::dbgs() << "Peeking "; dbgToken(Next); @@ -107,9 +132,40 @@ return Tokens[Position]; } + FormatToken *insertTokens(ArrayRef New) override { + assert(Position != -1); + assert((*New.rbegin())->Tok.is(tok::eof)); + int Next = Tokens.size(); + Tokens.append(New.begin(), New.end()); + LLVM_DEBUG({ + llvm::dbgs() << "Inserting:\n"; + for (int I = Next, E = Tokens.size(); I != E; ++I) + dbgToken(I, " "); + llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> " + << Position << "\n"; + }); + Jumps[Tokens.size() - 1] = Position; + Position = Next; + LLVM_DEBUG({ + llvm::dbgs() << "At inserted token "; + dbgToken(Position); + }); + return Tokens[Position]; + } + void reset() { Position = -1; } private: + int successor(int Current) const { + int Next = Current + 1; + auto it = Jumps.find(Next); + if (it != Jumps.end()) { + Next = it->second; + assert(Jumps.find(Next) == Jumps.end()); + } + return Next; + } + void dbgToken(int Position, llvm::StringRef Indent = "") { FormatToken *Tok = Tokens[Position]; llvm::dbgs() << Indent << "[" << Position @@ -117,8 +173,12 @@ << ", Macro: " << !!Tok->MacroCtx << "\n"; } - ArrayRef Tokens; + SmallVector Tokens; int Position; + + // Maps from position a to position b, so that when we reach a, the token + // stream continues at position b instead. + llvm::DenseMap Jumps; }; class ScopedMacroState : public FormatTokenSource { @@ -175,6 +235,10 @@ return Token; } + FormatToken *insertTokens(ArrayRef Tokens) override { + assert(false && "Cannot insert tokens while parsing a macro."); + } + private: bool eof() { return Token && Token->HasUnescapedNewline && diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -280,9 +280,6 @@ FormatTokenSource *Tokens; UnwrappedLineConsumer &Callback; - // FIXME: This is a temporary measure until we have reworked the ownership - // of the format tokens. The goal is to have the actual tokens created and - // owned outside of and handed into the UnwrappedLineParser. ArrayRef AllTokens; // Keeps a stack of the states of nested control statements (true if the diff --git a/clang/unittests/Format/FormatTokenSourceTest.cpp b/clang/unittests/Format/FormatTokenSourceTest.cpp --- a/clang/unittests/Format/FormatTokenSourceTest.cpp +++ b/clang/unittests/Format/FormatTokenSourceTest.cpp @@ -28,12 +28,17 @@ #define EXPECT_TOKEN_KIND(FormatTok, Kind) \ do { \ FormatToken *Tok = FormatTok; \ - EXPECT_EQ((Tok)->Tok.getKind(), Kind) << *(Tok); \ + EXPECT_EQ(Tok->Tok.getKind(), Kind) << *Tok; \ + } while (false); +#define EXPECT_TOKEN_ID(FormatTok, Name) \ + do { \ + FormatToken *Tok = FormatTok; \ + EXPECT_EQ(Tok->Tok.getKind(), tok::identifier) << *Tok; \ + EXPECT_EQ(Tok->TokenText, Name) << *Tok; \ } while (false); TEST_F(IndexedTokenSourceTest, EmptyInput) { - TokenList Tokens = lex(""); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("")); EXPECT_FALSE(Source.isEOF()); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TRUE(Source.isEOF()); @@ -46,8 +51,7 @@ } TEST_F(IndexedTokenSourceTest, NavigateTokenStream) { - TokenList Tokens = lex("int a;"); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("int a;")); EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::kw_int); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::kw_int); EXPECT_EQ(Source.getPreviousToken(), nullptr); @@ -60,11 +64,12 @@ EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); } TEST_F(IndexedTokenSourceTest, ResetPosition) { - TokenList Tokens = lex("int a;"); - IndexedTokenSource Source(Tokens); + IndexedTokenSource Source(lex("int a;")); Source.getNextToken(); unsigned Position = Source.getPosition(); Source.getNextToken(); @@ -73,6 +78,50 @@ EXPECT_TOKEN_KIND(Source.setPosition(Position), tok::kw_int); } +TEST_F(IndexedTokenSourceTest, InsertTokens) { + IndexedTokenSource Source(lex("A1 A2")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A2"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensAtEOF) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursive) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + // A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1"); + // B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1"); + // C1 B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1"); + // D1 C1 B1 A1 + EXPECT_TOKEN_ID(Source.getNextToken(), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursiveAtEndOfSequence) { + IndexedTokenSource Source(lex("A1")); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + } // namespace } // namespace format } // namespace clang