diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -28,8 +28,11 @@ #include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Allocator.h" #include +#include namespace clang { namespace syntax { @@ -204,6 +207,12 @@ template struct ElementAndDelimiter { Element *element; Leaf *delimiter; + bool operator==(const ElementAndDelimiter &Other) const { + return element == Other.element && delimiter == Other.delimiter; + } + bool operator!=(const ElementAndDelimiter &Other) const { + return !(*this == Other); + } }; enum class TerminationKind { @@ -214,10 +223,12 @@ using Tree::Tree; static bool classof(const Node *N); - /// Returns the elements and corresponding delimiters. Missing elements - /// and delimiters are represented as null pointers. + + /// Iterator through elements and their corresponding delimiters. Missing + /// elements and delimiters are represented as null pointers. Below we give + /// examples of what this iteration would looks like. /// - /// For example, in a separated list: + /// In a separated list: /// "a, b, c" <=> [("a" , ","), ("b" , "," ), ("c" , null)] /// "a, , c" <=> [("a" , ","), (null, "," ), ("c" , null)] /// "a, b c" <=> [("a" , ","), ("b" , null), ("c" , null)] @@ -228,6 +239,182 @@ /// "a; ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )] /// "a; b c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )] /// "a; b; c" <=> [("a" , ";"), ("b" , ";" ), ("c" , null)] + /// + /// NOTE: This iterator *should* be a forward iterator, however until C++17 we + /// can only have reference proxies for input iterators, and + /// `ElementAndDelimiter` acts as one. + template + class ElementAndDelimiterIterator + : public std::iterator, ptrdiff_t, + const ElementAndDelimiter *, + ElementAndDelimiter> { + public: + ElementAndDelimiterIterator &operator++() { + assert(Kind != IteratorKind::End && "Incrementing end iterator."); + if (Kind == IteratorKind::BeforeBegin) { + if (auto *Begin = Parent->getFirstChild()) { + Kind = IteratorKind::NotSentinel; + if (isElement(Begin)) + Current = getWithDelimiter(cast(Begin)); + else + Current = {nullptr, cast(Begin)}; + } else + Kind = IteratorKind::End; + } else { + auto Next = [](ElementAndDelimiter ED) + -> llvm::Optional> { + auto *N = ED.element ? ED.element : ED.delimiter; + if (!N) + return None; + if (isElement(N)) + return getElementAndDelimiterAfterElement(cast(N)); + return getElementAndDelimiterAfterDelimiter(cast(N)); + }(Current); + if (Next.hasValue()) + Current = Next.getValue(); + else + Kind = IteratorKind::End; + } + return *this; + } + ElementAndDelimiterIterator operator++(int) { + auto tmp = *this; + ++*this; + return tmp; + } + + ElementAndDelimiter operator*() const { + assert(Kind == IteratorKind::NotSentinel && + "Dereferencing sentinel iterator."); + return Current; + } + + bool operator==(const ElementAndDelimiterIterator &Other) const { + return (Parent == Other.Parent && Kind == Other.Kind && + (Kind != IteratorKind::NotSentinel || Current == Other.Current)); + } + + bool operator!=(const ElementAndDelimiterIterator &Other) const { + return !(*this == Other); + } + + private: + enum class IteratorKind { BeforeBegin, End, NotSentinel }; + + List *Parent; + IteratorKind Kind; + // If `Kind != NotSentinel`, then `Current` has trash. + ElementAndDelimiter Current; + + public: + List *getParent() { return Parent; } + const List *getParent() const { return Parent; } + + bool isEnd() const { return Kind == IteratorKind::End; }; + bool isBeforeBegin() const { return Kind == IteratorKind::BeforeBegin; }; + bool isSentinel() const { return Kind != IteratorKind::NotSentinel; }; + + static ElementAndDelimiterIterator makeEnd(List *Parent) { + assert(Parent); + ElementAndDelimiterIterator It; + It.Parent = Parent; + It.Kind = IteratorKind::End; + return It; + } + + static ElementAndDelimiterIterator + makeBeforeBegin(List *Parent) { + assert(Parent); + ElementAndDelimiterIterator It; + It.Parent = Parent; + It.Kind = IteratorKind::BeforeBegin; + return It; + } + + private: + static ElementAndDelimiter + getWithDelimiter(ElementType *Element) { + assert(Element && isElement(Element)); + auto *Next = Element->getNextSibling(); + if (!Next) + return {Element, nullptr}; + + if (isElement(Next)) + return {Element, nullptr}; + if (isDelimiter(Next)) + return {Element, cast(Next)}; + + llvm_unreachable( + "A list can have only elements and delimiters as children."); + } + + static llvm::Optional> + getElementAndDelimiterAfterDelimiter(Leaf *Delimiter) { + assert(Delimiter && isDelimiter(Delimiter)); + auto *L = cast(Delimiter->getParent()); + auto *Next = Delimiter->getNextSibling(); + if (!Next) { + switch (L->getTerminationKind()) { + // List is separated and ends with delimiter + // => missing last ElementAndDelimiter. + case TerminationKind::Separated: + return llvm::Optional>( + {nullptr, nullptr}); + case TerminationKind::Terminated: + case TerminationKind::MaybeTerminated: + return None; + } + } + if (isElement(Next)) + return getWithDelimiter(cast(Next)); + + // Consecutive Delimiters => missing Element + if (isDelimiter(Next)) + return llvm::Optional>( + {nullptr, cast(Next)}); + llvm_unreachable( + "A list can have only elements and delimiters as children."); + } + + static llvm::Optional> + getElementAndDelimiterAfterElement(ElementType *Element) { + assert(Element && isElement(Element)); + auto *Next = Element->getNextSibling(); + if (!Next) + // This was the last element of the list. + return None; + + if (isElement(Next)) + // We have something of the form "a b ..." => 'b' starts the next + // `ElementAndDelimiter`. + return getWithDelimiter(cast(Next)); + if (isDelimiter(Next)) + // We have something of the form "a , ..." => whatever comes after the + // comma starts the next `ElementAndDelimiter`. + return getElementAndDelimiterAfterDelimiter(cast(Next)); + + llvm_unreachable( + "A list can have only elements and delimiters as children."); + } + }; + + ElementAndDelimiterIterator getBeforeBegin(); + ElementAndDelimiterIterator getBeginNode(); + ElementAndDelimiterIterator getEndNode(); + + template + using ElementsAndDelimitersRange = + llvm::iterator_range>; + + ElementsAndDelimitersRange getNodeRange() { + return ElementsAndDelimitersRange(getBeginNode(), getEndNode()); + } + + // Returns the elements of the list, and their corresponding delimiters. + // Missing elements are represented as null pointers. Iterating through + // `getElementsAsNodesAndDelimiters()` is equivalent to iterating through + // `getRange()` std::vector> getElementsAsNodesAndDelimiters(); /// Returns the elements of the list. Missing elements are represented @@ -251,6 +438,10 @@ /// This list may be empty when the source code has errors even if /// canBeEmpty() returns false. bool canBeEmpty() const; + +private: + static bool isElement(Node *N); + static bool isDelimiter(Node *N); }; } // namespace syntax diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -230,91 +230,77 @@ // vector std::vector syntax::NestedNameSpecifier::getSpecifiers() { - auto SpecifiersAsNodes = getElementsAsNodes(); std::vector Children; - for (const auto &Element : SpecifiersAsNodes) { - Children.push_back(llvm::cast(Element)); - } + for (auto C : getNodeRange()) + Children.push_back(cast(C.element)); + return Children; } std::vector> syntax::NestedNameSpecifier::getSpecifiersAndDoubleColons() { - auto SpecifiersAsNodesAndDoubleColons = getElementsAsNodesAndDelimiters(); std::vector> Children; - for (const auto &SpecifierAndDoubleColon : SpecifiersAsNodesAndDoubleColons) { - Children.push_back( - {llvm::cast(SpecifierAndDoubleColon.element), - SpecifierAndDoubleColon.delimiter}); - } + for (auto C : getNodeRange()) + Children.push_back({cast(C.element), C.delimiter}); + return Children; } std::vector syntax::CallArguments::getArguments() { - auto ArgumentsAsNodes = getElementsAsNodes(); std::vector Children; - for (const auto &ArgumentAsNode : ArgumentsAsNodes) { - Children.push_back(llvm::cast(ArgumentAsNode)); - } + for (auto C : getNodeRange()) + Children.push_back(cast(C.element)); + return Children; } std::vector> syntax::CallArguments::getArgumentsAndCommas() { - auto ArgumentsAsNodesAndCommas = getElementsAsNodesAndDelimiters(); std::vector> Children; - for (const auto &ArgumentAsNodeAndComma : ArgumentsAsNodesAndCommas) { - Children.push_back( - {llvm::cast(ArgumentAsNodeAndComma.element), - ArgumentAsNodeAndComma.delimiter}); - } + for (auto C : getNodeRange()) + Children.push_back({cast(C.element), C.delimiter}); + return Children; } std::vector syntax::ParameterDeclarationList::getParameterDeclarations() { - auto ParametersAsNodes = getElementsAsNodes(); std::vector Children; - for (const auto &ParameterAsNode : ParametersAsNodes) { - Children.push_back(llvm::cast(ParameterAsNode)); - } + for (auto C : getNodeRange()) + Children.push_back(cast(C.element)); + return Children; } std::vector> syntax::ParameterDeclarationList::getParametersAndCommas() { - auto ParametersAsNodesAndCommas = getElementsAsNodesAndDelimiters(); std::vector> Children; - for (const auto &ParameterAsNodeAndComma : ParametersAsNodesAndCommas) { + for (auto C : getNodeRange()) Children.push_back( - {llvm::cast(ParameterAsNodeAndComma.element), - ParameterAsNodeAndComma.delimiter}); - } + {cast(C.element), C.delimiter}); + return Children; } std::vector syntax::DeclaratorList::getDeclarators() { - auto DeclaratorsAsNodes = getElementsAsNodes(); std::vector Children; - for (const auto &DeclaratorAsNode : DeclaratorsAsNodes) { - Children.push_back(llvm::cast(DeclaratorAsNode)); - } + for (auto C : getNodeRange()) + Children.push_back(cast(C.element)); + return Children; } std::vector> syntax::DeclaratorList::getDeclaratorsAndCommas() { - auto DeclaratorsAsNodesAndCommas = getElementsAsNodesAndDelimiters(); std::vector> Children; - for (const auto &DeclaratorAsNodeAndComma : DeclaratorsAsNodesAndCommas) { + for (auto C : getNodeRange()) Children.push_back( - {llvm::cast(DeclaratorAsNodeAndComma.element), - DeclaratorAsNodeAndComma.delimiter}); - } + {cast(C.element), C.delimiter}); + return Children; } diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -311,91 +311,40 @@ } } -std::vector> -syntax::List::getElementsAsNodesAndDelimiters() { - if (!getFirstChild()) - return {}; - - std::vector> Children; - syntax::Node *ElementWithoutDelimiter = nullptr; - for (auto *C = getFirstChild(); C; C = C->getNextSibling()) { - switch (C->getRole()) { - case syntax::NodeRole::ListElement: { - if (ElementWithoutDelimiter) { - Children.push_back({ElementWithoutDelimiter, nullptr}); - } - ElementWithoutDelimiter = C; - break; - } - case syntax::NodeRole::ListDelimiter: { - Children.push_back({ElementWithoutDelimiter, cast(C)}); - ElementWithoutDelimiter = nullptr; - break; - } - default: - llvm_unreachable( - "A list can have only elements and delimiters as children."); - } - } +bool syntax::List::isElement(syntax::Node *N) { + return N && N->getRole() == NodeRole::ListElement; +} - switch (getTerminationKind()) { - case syntax::List::TerminationKind::Separated: { - Children.push_back({ElementWithoutDelimiter, nullptr}); - break; - } - case syntax::List::TerminationKind::Terminated: - case syntax::List::TerminationKind::MaybeTerminated: { - if (ElementWithoutDelimiter) { - Children.push_back({ElementWithoutDelimiter, nullptr}); - } - break; - } - } +bool syntax::List::isDelimiter(syntax::Node *N) { + return N && N->getRole() == NodeRole::ListDelimiter; +} - return Children; +syntax::List::ElementAndDelimiterIterator +syntax::List::getBeforeBegin() { + return ElementAndDelimiterIterator::makeBeforeBegin(this); } -// Almost the same implementation of `getElementsAsNodesAndDelimiters` but -// ignoring delimiters -std::vector syntax::List::getElementsAsNodes() { - if (!getFirstChild()) - return {}; +syntax::List::ElementAndDelimiterIterator +syntax::List::getBeginNode() { + return std::next(ElementAndDelimiterIterator::makeBeforeBegin(this)); +} - std::vector Children; - syntax::Node *ElementWithoutDelimiter = nullptr; - for (auto *C = getFirstChild(); C; C = C->getNextSibling()) { - switch (C->getRole()) { - case syntax::NodeRole::ListElement: { - if (ElementWithoutDelimiter) { - Children.push_back(ElementWithoutDelimiter); - } - ElementWithoutDelimiter = C; - break; - } - case syntax::NodeRole::ListDelimiter: { - Children.push_back(ElementWithoutDelimiter); - ElementWithoutDelimiter = nullptr; - break; - } - default: - llvm_unreachable("A list has only elements or delimiters."); - } - } +syntax::List::ElementAndDelimiterIterator +syntax::List::getEndNode() { + return ElementAndDelimiterIterator::makeEnd(this); +} - switch (getTerminationKind()) { - case syntax::List::TerminationKind::Separated: { - Children.push_back(ElementWithoutDelimiter); - break; - } - case syntax::List::TerminationKind::Terminated: - case syntax::List::TerminationKind::MaybeTerminated: { - if (ElementWithoutDelimiter) { - Children.push_back(ElementWithoutDelimiter); - } - break; - } - } +std::vector> +syntax::List::getElementsAsNodesAndDelimiters() { + return std::vector>( + getBeginNode(), getEndNode()); +} +std::vector syntax::List::getElementsAsNodes() { + std::vector Children; + std::transform( + getBeginNode(), getEndNode(), std::back_inserter(Children), + [](ElementAndDelimiter ED) { return ED.element; }); return Children; } diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -11,6 +11,7 @@ #include "clang/Tooling/Syntax/BuildTree.h" #include "clang/Tooling/Syntax/Nodes.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" using namespace clang; @@ -125,7 +126,7 @@ } class ListTest : public SyntaxTreeTest { -private: +protected: std::string dumpQuotedTokensOrNull(const Node *N) { return N ? "'" + StringRef(N->dumpTokens(Arena->getSourceManager())) @@ -135,7 +136,15 @@ : "null"; } -protected: + std::string + dumpElementAndDelimiter(const List::ElementAndDelimiter ED) { + std::string Storage; + llvm::raw_string_ostream OS(Storage); + OS << "(" << dumpQuotedTokensOrNull(ED.element) << ", " + << dumpQuotedTokensOrNull(ED.delimiter) << ")"; + return OS.str(); + } + std::string dumpElementsAndDelimiters(ArrayRef> EDs) { std::string Storage; @@ -145,8 +154,7 @@ llvm::interleaveComma( EDs, OS, [&OS, this](const List::ElementAndDelimiter &ED) { - OS << "(" << dumpQuotedTokensOrNull(ED.element) << ", " - << dumpQuotedTokensOrNull(ED.delimiter) << ")"; + OS << dumpElementAndDelimiter(ED); }); OS << "]"; @@ -351,4 +359,40 @@ EXPECT_EQ(dumpNodes(List->getElementsAsNodes()), "['a', 'b', 'c']"); } +TEST_P(ListTest, List_Iterator_StableDereference) { + if (!GetParam().isCXX()) { + return; + } + buildTree("", GetParam()); + + // "a:: b:: c" + auto *List = dyn_cast(syntax::createTree( + *Arena, + { + {createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement}, + {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement}, + {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement}, + }, + NodeKind::NestedNameSpecifier)); + + auto It = List->getBeginNode(); + const auto &First = *It; + auto ItAssign = It; + + EXPECT_EQ(dumpElementAndDelimiter(First), "('a', '::')"); + ++It; + EXPECT_EQ(dumpElementAndDelimiter(First), "('a', '::')"); + + EXPECT_EQ(*ItAssign, First); + + auto It2 = std::next(List->getBeforeBegin(), 2); + EXPECT_EQ(It, It2); + EXPECT_EQ(*It, *It2); + + ++It; + ++It; + EXPECT_EQ(It, List->getEndNode()); +} } // namespace