diff --git a/clang/lib/Format/FormatTokenSource.h b/clang/lib/Format/FormatTokenSource.h --- a/clang/lib/Format/FormatTokenSource.h +++ b/clang/lib/Format/FormatTokenSource.h @@ -1,4 +1,3 @@ - //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -8,7 +7,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file defines the \c TokenSource interface, which provides a token +/// This file defines the \c FormatTokenSource interface, which provides a token /// stream as well as the ability to manipulate the token stream. /// //===----------------------------------------------------------------------===// @@ -24,6 +23,10 @@ namespace clang { namespace format { +// Navigate a token stream. +// +// Enables traversal of a token stream, resetting the position in a token +// stream, as well as inserting new tokens. class FormatTokenSource { public: virtual ~FormatTokenSource() {} @@ -50,11 +53,25 @@ // Resets the token stream to the state it was in when getPosition() returned // Position, and return the token at that position in the stream. virtual FormatToken *setPosition(unsigned Position) = 0; + + // Insert the given tokens before the current position. + // Returns the first token in \c Tokens. + // The next returned token will be the second token in \c Tokens. + // Requires the last token in Tokens to be EOF; once the EOF token is reached, + // the next token will be the last token returned by getNextToken(); + // + // For example, given the token sequence 'a1 a2': + // getNextToken() -> a1 + // insertTokens('b1 b2') -> b1 + // getNextToken() -> b2 + // getNextToken() -> a1 + // getNextToken() -> a2 + virtual FormatToken *insertTokens(ArrayRef Tokens) = 0; }; class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource { public: - IndexedTokenSource(ArrayRef Tokens) + IndexedTokenSource(SmallVectorImpl &Tokens) : Tokens(Tokens), Position(-1) {} FormatToken *getNextToken() override { @@ -65,7 +82,7 @@ }); return Tokens[Position]; } - ++Position; + Position = next(Position); LLVM_DEBUG({ llvm::dbgs() << "Next "; dbgToken(Position); @@ -80,10 +97,10 @@ FormatToken *peekNextToken(bool SkipComment = false) override { if (isEOF()) return Tokens[Position]; - int Next = Position + 1; + int Next = next(Position); if (SkipComment) while (Tokens[Next]->is(tok::comment)) - ++Next; + Next = next(Next); LLVM_DEBUG({ llvm::dbgs() << "Peeking "; dbgToken(Next); @@ -107,9 +124,40 @@ return Tokens[Position]; } + FormatToken *insertTokens(ArrayRef New) override { + assert(Position != -1); + assert((*New.rbegin())->Tok.is(tok::eof)); + LLVM_DEBUG(llvm::dbgs() << "Inserting:\n"); + int Next = Tokens.size(); + Tokens.append(New.begin(), New.end()); + LLVM_DEBUG({ + for (int I = Next, E = Tokens.size(); I != E; ++I) + dbgToken(I, " "); + llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> " + << Position << "\n"; + }); + Jumps[Tokens.size() - 1] = Position; + Position = Next; + LLVM_DEBUG({ + llvm::dbgs() << "At inserted token "; + dbgToken(Position); + }); + return Tokens[Position]; + } + void reset() { Position = -1; } private: + int next(int Current) { + int Next = Current + 1; + auto it = Jumps.find(Next); + if (it != Jumps.end()) { + Next = it->second; + assert(Jumps.find(Next) == Jumps.end()); + } + return Next; + } + void dbgToken(int Position, llvm::StringRef Indent = "") { FormatToken *Tok = Tokens[Position]; llvm::dbgs() << Indent << "[" << Position @@ -117,8 +165,12 @@ << ", Macro: " << !!Tok->MacroCtx << "\n"; } - ArrayRef Tokens; + SmallVectorImpl &Tokens; int Position; + + // Maps from position a to position b, so that when we reach a, the token + // stream continues at position b instead. + std::map Jumps; }; class ScopedMacroState : public FormatTokenSource { @@ -175,6 +227,10 @@ return Token; } + FormatToken *insertTokens(ArrayRef Tokens) override { + assert(false && "Cannot insert tokens while parsing a macro."); + } + private: bool eof() { return Token && Token->HasUnescapedNewline && diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -89,7 +89,8 @@ public: UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, - unsigned FirstStartColumn, ArrayRef Tokens, + unsigned FirstStartColumn, + SmallVectorImpl &Tokens, UnwrappedLineConsumer &Callback); void parse(); @@ -283,7 +284,9 @@ // FIXME: This is a temporary measure until we have reworked the ownership // of the format tokens. The goal is to have the actual tokens created and // owned outside of and handed into the UnwrappedLineParser. - ArrayRef AllTokens; + // FIXME: The above fixme doesn't work if we need to create tokens while + // parsing. + SmallVectorImpl &AllTokens; // Keeps a stack of the states of nested control statements (true if the // statement contains more than some predefined number of nested statements). diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -146,7 +146,7 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, - ArrayRef Tokens, + SmallVectorImpl &Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), CurrentLines(&Lines), Style(Style), Keywords(Keywords), diff --git a/clang/unittests/Format/FormatTokenSourceTest.cpp b/clang/unittests/Format/FormatTokenSourceTest.cpp --- a/clang/unittests/Format/FormatTokenSourceTest.cpp +++ b/clang/unittests/Format/FormatTokenSourceTest.cpp @@ -30,6 +30,12 @@ FormatToken *Tok = FormatTok; \ EXPECT_EQ((Tok)->Tok.getKind(), Kind) << *(Tok); \ } while (false); +#define EXPECT_TOKEN_ID(FormatTok, Name) \ + do { \ + FormatToken *Tok = FormatTok; \ + EXPECT_EQ((Tok)->Tok.getKind(), tok::identifier) << *(Tok); \ + EXPECT_EQ((Tok)->TokenText, Name) << *(Tok); \ + } while (false); TEST_F(IndexedTokenSourceTest, EmptyInput) { TokenList Tokens = lex(""); @@ -60,6 +66,8 @@ EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi); } TEST_F(IndexedTokenSourceTest, ResetPosition) { @@ -73,6 +81,62 @@ EXPECT_TOKEN_KIND(Source.setPosition(Position), tok::kw_int); } +TEST_F(IndexedTokenSourceTest, InsertTokens) { + TokenList TokensA = lex("A1 A2"); + TokenList TokensB = lex("B1 B2"); + IndexedTokenSource Source(TokensA); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(TokensB), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A2"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensAtEOF) { + TokenList TokensA = lex("A1"); + TokenList TokensB = lex("B1 B2"); + IndexedTokenSource Source(TokensA); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); + EXPECT_TOKEN_ID(Source.insertTokens(TokensB), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B2"); + EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursive) { + TokenList TokensA = lex("A1"); + TokenList TokensB = lex("B1"); + TokenList TokensC = lex("C1"); + TokenList TokensD = lex("D1"); + IndexedTokenSource Source(TokensA); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + // A1 + EXPECT_TOKEN_ID(Source.insertTokens(TokensB), "B1"); + // B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(TokensC), "C1"); + // C1 B1 A1 + EXPECT_TOKEN_ID(Source.insertTokens(TokensD), "D1"); + // D1 C1 B1 A1 + EXPECT_TOKEN_ID(Source.getNextToken(), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + +TEST_F(IndexedTokenSourceTest, InsertTokensRecursiveAtEndOfSequence) { + TokenList TokensA = lex("A1"); + TokenList TokensB = lex("B1"); + TokenList TokensC = lex("C1"); + TokenList TokensD = lex("D1"); + IndexedTokenSource Source(TokensA); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(TokensB), "B1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(TokensC), "C1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); + EXPECT_TOKEN_ID(Source.insertTokens(TokensD), "D1"); + EXPECT_TOKEN_ID(Source.getNextToken(), "A1"); +} + } // namespace } // namespace format } // namespace clang diff --git a/clang/unittests/Format/TestLexer.h b/clang/unittests/Format/TestLexer.h --- a/clang/unittests/Format/TestLexer.h +++ b/clang/unittests/Format/TestLexer.h @@ -71,7 +71,8 @@ TokenList annotate(llvm::StringRef Code) { FormatTokenLexer Lex = getNewLexer(Code); - auto Tokens = Lex.lex(); + auto Toks = Lex.lex(); + SmallVector Tokens(Toks.begin(), Toks.end()); UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this); Parser.parse(); TokenAnnotator Annotator(Style, Lex.getKeywords());