diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -22,6 +22,7 @@ #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLParser.h" +#include "llvm/Support/YamlSerializer.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -123,11 +124,6 @@ // static void bitset(IO &io, T &value); }; -/// Describe which type of quotes should be used when quoting is necessary. -/// Some non-printable characters need to be double-quoted, while some others -/// are fine with simple-quoting, and some don't need any quoting. -enum class QuotingType { None, Single, Double }; - /// This class should be specialized by type that requires custom conversion /// to/from a yaml scalar. For example: /// @@ -667,82 +663,6 @@ S.equals("false") || S.equals("False") || S.equals("FALSE"); } -// 5.1. Character Set -// The allowed character range explicitly excludes the C0 control block #x0-#x1F -// (except for TAB #x9, LF #xA, and CR #xD which are allowed), DEL #x7F, the C1 -// control block #x80-#x9F (except for NEL #x85 which is allowed), the surrogate -// block #xD800-#xDFFF, #xFFFE, and #xFFFF. -inline QuotingType needsQuotes(StringRef S) { - if (S.empty()) - return QuotingType::Single; - - QuotingType MaxQuotingNeeded = QuotingType::None; - if (isSpace(static_cast(S.front())) || - isSpace(static_cast(S.back()))) - MaxQuotingNeeded = QuotingType::Single; - if (isNull(S)) - MaxQuotingNeeded = QuotingType::Single; - if (isBool(S)) - MaxQuotingNeeded = QuotingType::Single; - if (isNumeric(S)) - MaxQuotingNeeded = QuotingType::Single; - - // 7.3.3 Plain Style - // Plain scalars must not begin with most indicators, as this would cause - // ambiguity with other YAML constructs. - if (std::strchr(R"(-?:\,[]{}#&*!|>'"%@`)", S[0]) != nullptr) - MaxQuotingNeeded = QuotingType::Single; - - for (unsigned char C : S) { - // Alphanum is safe. - if (isAlnum(C)) - continue; - - switch (C) { - // Safe scalar characters. - case '_': - case '-': - case '^': - case '.': - case ',': - case ' ': - // TAB (0x9) is allowed in unquoted strings. - case 0x9: - continue; - // LF(0xA) and CR(0xD) may delimit values and so require at least single - // quotes. LLVM YAML parser cannot handle single quoted multiline so use - // double quoting to produce valid YAML. - case 0xA: - case 0xD: - return QuotingType::Double; - // DEL (0x7F) are excluded from the allowed character range. - case 0x7F: - return QuotingType::Double; - // Forward slash is allowed to be unquoted, but we quote it anyway. We have - // many tests that use FileCheck against YAML output, and this output often - // contains paths. If we quote backslashes but not forward slashes then - // paths will come out either quoted or unquoted depending on which platform - // the test is run on, making FileCheck comparisons difficult. - case '/': - default: { - // C0 control block (0x0 - 0x1F) is excluded from the allowed character - // range. - if (C <= 0x1F) - return QuotingType::Double; - - // Always double quote UTF-8. - if ((C & 0x80) != 0) - return QuotingType::Double; - - // The character is not safe, at least simple quoting needed. - MaxQuotingNeeded = QuotingType::Single; - } - } - } - - return MaxQuotingNeeded; -} - template struct missingTraits : public std::integral_constant StateStack; - int Column = 0; - int ColumnAtFlowStart = 0; - int ColumnAtMapFlowStart = 0; - bool NeedBitValueComma = false; - bool NeedFlowSequenceComma = false; + OutputStream Out; bool EnumerationMatchFound = false; bool WriteDefaultValues = false; - StringRef Padding; - StringRef PaddingBeforeContainer; }; template diff --git a/llvm/include/llvm/Support/YamlSerializer.h b/llvm/include/llvm/Support/YamlSerializer.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Support/YamlSerializer.h @@ -0,0 +1,271 @@ +//===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_YAMLSERIALIZER_H +#define LLVM_SUPPORT_YAMLSERIALIZER_H + +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/PointerLikeTypeTraits.h" +namespace llvm { +class raw_ostream; + +namespace yaml { + +/// Describe which type of quotes should be used when quoting is necessary. +/// Some non-printable characters need to be double-quoted, while some others +/// are fine with simple-quoting, and some don't need any quoting. +enum class QuotingType { None, Single, Double }; + +// 5.1. Character Set +// The allowed character range explicitly excludes the C0 control block #x0-#x1F +// (except for TAB #x9, LF #xA, and CR #xD which are allowed), DEL #x7F, the C1 +// control block #x80-#x9F (except for NEL #x85 which is allowed), the surrogate +// block #xD800-#xDFFF, #xFFFE, and #xFFFF. +QuotingType needsQuotes(StringRef S); + +class OutputStream { +public: + OutputStream(raw_ostream &Underlying, int WrapColumn = 70) + : Out(Underlying), WrapColumn(WrapColumn) { + // Silence any warnings about unused private variables + (void)IsDocEmpty; + (void)CanEmitValues; + } + ~OutputStream() { + if (InDocument) + endDocuments(); + } + struct LLVM_NODISCARD MapBuilder { + private: + friend class OutputStream; + MapBuilder(OutputStream &Out) : Out(&Out) {} + + public: + ~MapBuilder() { + if (Out) + Out->exitMap(); + } + MapBuilder &emit(StringRef Key, + llvm::function_ref BuildValue) { + assert(Out && "Map already disposed"); + Out->emitMapKey(Key); + BuildValue(*Out); + Out->advanceMap(); + return *this; + } + + MapBuilder &emit(StringRef Key, StringRef Value) { + return emit(Key, Value, needsQuotes(Value)); + } + + MapBuilder &emit(StringRef Key, StringRef Value, + QuotingType MustQuoteValue) { + assert(Out && "Map already disposed"); + Out->emitMapKey(Key); + Out->outputScalar(Value, MustQuoteValue); + Out->advanceMap(); + return *this; + } + + void close() { + assert(Out && "Map already disposed"); + Out->exitMap(); + Out = nullptr; + } + + private: + OutputStream *Out; + }; + + struct LLVM_NODISCARD SequenceBuilder { + private: + friend class OutputStream; + SequenceBuilder(OutputStream &Out) : Out(&Out) {} + + public: + ~SequenceBuilder() { + if (Out) + Out->exitSequence(); + } + SequenceBuilder &emit(llvm::function_ref EmitValue) { + assert(Out && "Sequence already disposed"); + Out->preEmitSequence(); + EmitValue(*Out); + Out->advanceSequence(); + return *this; + } + + SequenceBuilder &emit(StringRef Item) { + return emit(Item, needsQuotes(Item)); + } + + SequenceBuilder &emit(StringRef Item, QuotingType MustQuote) { + assert(Out && "Sequence already disposed"); + Out->preEmitSequence(); + Out->outputScalar(Item, MustQuote); + Out->advanceSequence(); + return *this; + } + + void close() { + assert(Out && "Sequence already disposed"); + Out->exitSequence(); + Out = nullptr; + } + + private: + OutputStream *Out; + }; + + struct LLVM_NODISCARD BitsetBuilder { + private: + friend class OutputStream; + BitsetBuilder(OutputStream &Out) : Out(&Out) {} + + public: + ~BitsetBuilder() { + if (Out) + Out->exitBitset(); + } + BitsetBuilder &emit(StringRef Str) { + assert(Out && "Bitset already disposed"); + Out->emitBitset(Str); + return *this; + } + + void close() { + assert(Out && "Bitset already disposed"); + Out->exitBitset(); + Out = nullptr; + } + + private: + OutputStream *Out; + }; + + MapBuilder buildMap(bool Flow) { + enterMap(Flow); + return MapBuilder(*this); + } + + SequenceBuilder buildSequence(bool Flow) { + enterSequence(Flow); + return SequenceBuilder(*this); + } + + BitsetBuilder buildBitset() { + enterBitset(); + return BitsetBuilder(*this); + } + + void newDocument(bool First); + void endDocuments(); + + void enterMap(bool IsFlow); + void emitMapKey(StringRef Key); + void advanceMap(); + void exitMap(); + + void enterSequence(bool IsFlow); + void preEmitSequence(); + void advanceSequence(); + void exitSequence(); + + void enterBitset(); + void emitBitset(StringRef Str); + void exitBitset(); + + void outputScalar(StringRef Str); + void outputScalar(StringRef Str, QuotingType Quotes); + void outputBlockScalar(StringRef Str); + void outputTag(StringRef Tag); + void outputScalarTag(StringRef Tag); + bool canElideEmptySequence() const; + +private: + enum InState : unsigned char { + inSeqFirstElement, + inFlowSeqFirstElement, + inSeqOtherElement, + inFlowSeqOtherElement, + + inMapFirstKey, + inFlowMapFirstKey, + inMapOtherKey, + inFlowMapOtherKey, + + inBitsetFirst, + inBitsetOther + + }; + + class State { + unsigned Type : llvm::detail::ConstantLog2::value + 1; + unsigned Column : 31 - llvm::detail::ConstantLog2::value; + + public: + State(InState State) : Type(State) { + // As we flow after we create the state, we have to set column afterwards. + } + InState getState() const { return static_cast(Type); } + void setState(InState NewState) { Type = NewState; } + unsigned getColumn() const { return Column; } + void setColumn(unsigned NewColumn) { Column = NewColumn; } + }; + + static constexpr bool inSeqAnyElement(InState State) { + return State == inSeqFirstElement || State == inSeqOtherElement; + } + + static constexpr bool inFlowSeqAnyElement(InState State) { + return State == inFlowSeqFirstElement || State == inFlowSeqOtherElement; + } + + static constexpr bool inMapAnyKey(InState State) { + return State == inMapFirstKey || State == inMapOtherKey; + } + + static constexpr bool inFlowMapAnyKey(InState State) { + return State == inFlowMapFirstKey || State == inFlowMapOtherKey; + } + + static constexpr bool inAnySequence(InState State) { + return State <= inFlowSeqOtherElement; + } + + static constexpr bool inAnyMap(InState State) { + return State >= inMapFirstKey && State <= inFlowMapOtherKey; + } + + static constexpr bool inBitset(InState State) { + return State >= inBitsetFirst; + } + + void output(StringRef S); + void outputUpToEndOfLine(StringRef S); + void newLineCheck(bool EmptySequence = false); + void outputNewLine(); + void wrapFlow(); + + raw_ostream &Out; + SmallVector StateStack; + SmallVector ContainerPaddingStack; + int WrapColumn; + int Column = 0; + StringRef Padding; + bool InDocument = false; + bool IsDocEmpty = true; + bool CanEmitValues = true; +}; +} // namespace yaml +} // namespace llvm + +#endif // LLVM_SUPPORT_YAMLSERIALIZER_H diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -226,6 +226,7 @@ WithColor.cpp X86TargetParser.cpp YAMLParser.cpp + YAMLSerializer.cpp YAMLTraits.cpp raw_os_ostream.cpp raw_ostream.cpp diff --git a/llvm/lib/Support/YAMLSerializer.cpp b/llvm/lib/Support/YAMLSerializer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Support/YAMLSerializer.cpp @@ -0,0 +1,453 @@ +//===- YAMLSerializer.cpp - Simple YAML Serializer ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a YAML serializer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/YamlSerializer.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/YAMLParser.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" + +#if NDEBUG +#define DEBUGONLY(X) \ + do { \ + } while (false) +#else +#define DEBUGONLY(X) \ + do { \ + X; \ + } while (false) +#endif + +using namespace llvm; +using namespace yaml; + +QuotingType llvm::yaml::needsQuotes(StringRef S) { + if (S.empty()) + return QuotingType::Single; + + QuotingType MaxQuotingNeeded = QuotingType::None; + if (isSpace(static_cast(S.front())) || + isSpace(static_cast(S.back()))) + MaxQuotingNeeded = QuotingType::Single; + if (isNull(S)) + MaxQuotingNeeded = QuotingType::Single; + if (isBool(S)) + MaxQuotingNeeded = QuotingType::Single; + if (isNumeric(S)) + MaxQuotingNeeded = QuotingType::Single; + + // 7.3.3 Plain Style + // Plain scalars must not begin with most indicators, as this would cause + // ambiguity with other YAML constructs. + if (std::strchr(R"(-?:\,[]{}#&*!|>'"%@`)", S[0]) != nullptr) + MaxQuotingNeeded = QuotingType::Single; + + for (unsigned char C : S) { + // Alphanum is safe. + if (isAlnum(C)) + continue; + + switch (C) { + // Safe scalar characters. + case '_': + case '-': + case '^': + case '.': + case ',': + case ' ': + // TAB (0x9) is allowed in unquoted strings. + case 0x9: + continue; + // LF(0xA) and CR(0xD) may delimit values and so require at least single + // quotes. LLVM YAML parser cannot handle single quoted multiline so use + // double quoting to produce valid YAML. + case 0xA: + case 0xD: + return QuotingType::Double; + // DEL (0x7F) are excluded from the allowed character range. + case 0x7F: + return QuotingType::Double; + // Forward slash is allowed to be unquoted, but we quote it anyway. We have + // many tests that use FileCheck against YAML output, and this output often + // contains paths. If we quote backslashes but not forward slashes then + // paths will come out either quoted or unquoted depending on which platform + // the test is run on, making FileCheck comparisons difficult. + case '/': + default: { + // C0 control block (0x0 - 0x1F) is excluded from the allowed character + // range. + if (C <= 0x1F) + return QuotingType::Double; + + // Always double quote UTF-8. + if ((C & 0x80) != 0) + return QuotingType::Double; + + // The character is not safe, at least simple quoting needed. + MaxQuotingNeeded = QuotingType::Single; + } + } + } + + return MaxQuotingNeeded; +} + +bool llvm::yaml::OutputStream::canElideEmptySequence() const { + // Normally, with an optional key/value where the value is an empty sequence, + // the whole key/value can be not written. But, that produces wrong yaml + // if the key/value is the only thing in the map and the map is used in + // a sequence. This detects if the this sequence is the first key/value + // in map that itself is embedded in a sequence. + if (StateStack.size() < 2) + return true; + if (StateStack.back().getState() != inMapFirstKey) + return true; + return !inSeqAnyElement(StateStack[StateStack.size() - 2].getState()); +} + +void OutputStream::output(StringRef s) { + Column += s.size(); + Out << s; + DEBUGONLY(IsDocEmpty = false); +} + +void OutputStream::outputUpToEndOfLine(StringRef s) { + output(s); + if (StateStack.empty() || + (!inFlowSeqAnyElement(StateStack.back().getState()) && + !inFlowMapAnyKey(StateStack.back().getState()) && + !inBitset(StateStack.back().getState()))) + Padding = "\n"; +} + +void OutputStream::outputNewLine() { + Out << "\n"; + Column = 0; +} + +// if seq at top, indent as if map, then add "- " +// if seq in middle, use "- " if firstKey, else use " " +// +void OutputStream::newLineCheck(bool EmptySequence) { + if (Padding != "\n") { + output(Padding); + Padding = {}; + return; + } + outputNewLine(); + Padding = {}; + + if (StateStack.size() == 0 || EmptySequence) + return; + + unsigned Indent = StateStack.size() - 1; + bool OutputDash = false; + + if (inSeqAnyElement(StateStack.back().getState())) { + OutputDash = true; + } else if ((StateStack.size() > 1) && + ((StateStack.back().getState() == inMapFirstKey) || + inFlowSeqAnyElement(StateStack.back().getState()) || + (StateStack.back().getState() == inFlowMapFirstKey)) && + inSeqAnyElement(StateStack[StateStack.size() - 2].getState())) { + --Indent; + OutputDash = true; + } + + for (unsigned i = 0; i < Indent; ++i) { + output(" "); + } + if (OutputDash) { + output("- "); + } +} + +void OutputStream::wrapFlow() { + if (WrapColumn && Column > WrapColumn) { + output("\n"); + for (unsigned i = 0; i < StateStack.back().getColumn(); ++i) + output(" "); + Column = StateStack.back().getColumn(); + output(" "); + } +} + +void OutputStream::outputScalar(StringRef Str) { + outputScalar(Str, needsQuotes(Str)); +} + +void OutputStream::outputScalar(StringRef S, QuotingType MustQuote) { + assert(CanEmitValues); + DEBUGONLY(CanEmitValues = false); + newLineCheck(); + if (S.empty()) { + // Print '' for the empty string because leaving the field empty is not + // allowed. + outputUpToEndOfLine("''"); + return; + } + if (MustQuote == QuotingType::None) { + // Only quote if we must. + outputUpToEndOfLine(S); + return; + } + + const char *const Quote = MustQuote == QuotingType::Single ? "'" : "\""; + output(Quote); // Starting quote. + + // When using double-quoted strings (and only in that case), non-printable + // characters may be present, and will be escaped using a variety of + // unicode-scalar and special short-form escapes. This is handled in + // yaml::escape. + if (MustQuote == QuotingType::Double) { + output(yaml::escape(S, /* EscapePrintable= */ false)); + outputUpToEndOfLine(Quote); + return; + } + + unsigned i = 0; + unsigned j = 0; + unsigned End = S.size(); + const char *Base = S.data(); + + // When using single-quoted strings, any single quote ' must be doubled to be + // escaped. + while (j < End) { + if (S[j] == '\'') { // Escape quotes. + output(StringRef(&Base[i], j - i)); // "flush". + output(StringLiteral("''")); // Print it as '' + i = j + 1; + } + ++j; + } + output(StringRef(&Base[i], j - i)); + outputUpToEndOfLine(Quote); // Ending quote. +} + +void OutputStream::outputBlockScalar(StringRef S) { + assert(CanEmitValues); + if (!StateStack.empty()) + newLineCheck(); + output(" |"); + outputNewLine(); + + unsigned Indent = StateStack.empty() ? 1 : StateStack.size(); + + auto Buffer = MemoryBuffer::getMemBuffer(S, "", false); + for (line_iterator Lines(*Buffer, false); !Lines.is_at_end(); ++Lines) { + for (unsigned I = 0; I < Indent; ++I) { + output(" "); + } + output(*Lines); + outputNewLine(); + } + DEBUGONLY(CanEmitValues = false); +} + +void OutputStream::outputScalarTag(StringRef Tag) { + if (Tag.empty()) + return; + newLineCheck(); + output(Tag); + output(" "); +} + +void OutputStream::outputTag(StringRef Tag) { + // If this tag is being written inside a sequence we should write the start + // of the sequence before writing the tag, otherwise the tag won't be + // attached to the element in the sequence, but rather the sequence itself. + bool OuterSequenceElement = + StateStack.size() > 1 + ? inAnySequence(StateStack[StateStack.size() - 2].getState()) + : false; + if (OuterSequenceElement && StateStack.back().getState() == inMapFirstKey) { + newLineCheck(); + } else { + output(" "); + } + output(Tag); + if (OuterSequenceElement) { + // If we're writing the tag during the first element of a map, the tag + // takes the place of the first element in the sequence. + if (StateStack.back().getState() == inMapFirstKey) + StateStack.back().setState(inMapOtherKey); + // Tags inside maps in sequences should act as keys in the map from a + // formatting perspective, so we always want a newline in a sequence. + Padding = "\n"; + } +} + +void llvm::yaml::OutputStream::newDocument(bool First) { + assert(StateStack.empty() && + "Cannot create new document while state is non-empty"); + assert(IsDocEmpty == CanEmitValues); + if (First) + outputUpToEndOfLine("---"); + else + outputUpToEndOfLine("\n---"); + InDocument = true; + DEBUGONLY(IsDocEmpty = false); + DEBUGONLY(CanEmitValues = true); +} + +void llvm::yaml::OutputStream::endDocuments() { + assert(IsDocEmpty == CanEmitValues); + outputUpToEndOfLine("\n...\n"); + InDocument = false; +} + +void llvm::yaml::OutputStream::enterMap(bool IsFlow) { + assert(CanEmitValues); + if (IsFlow) { + StateStack.push_back(inFlowMapFirstKey); + newLineCheck(); + StateStack.back().setColumn(Column); + output("{ "); + } else { + StateStack.push_back(inMapFirstKey); + ContainerPaddingStack.push_back(Padding); + Padding = "\n"; + } + DEBUGONLY(CanEmitValues = false); +} + +void llvm::yaml::OutputStream::emitMapKey(StringRef Key) { + assert(!CanEmitValues); + if (inMapAnyKey(StateStack.back().getState())) { + newLineCheck(); + output(Key); + output(":"); + const char *Spaces = " "; + if (Key.size() < strlen(Spaces)) + Padding = &Spaces[Key.size()]; + else + Padding = " "; + } else { + assert(inFlowMapAnyKey(StateStack.back().getState())); + if (StateStack.back().getState() == inFlowMapOtherKey) + output(", "); + wrapFlow(); + output(Key); + output(": "); + } + DEBUGONLY(CanEmitValues = true); +} + +void llvm::yaml::OutputStream::advanceMap() { + assert(inAnyMap(StateStack.back().getState())); + assert(!CanEmitValues && "Waiting for value to be emitted"); + StateStack.back().setState( + static_cast(StateStack.back().getState() | 2U)); + DEBUGONLY(CanEmitValues = false); +} + +void llvm::yaml::OutputStream::exitMap() { + assert(!CanEmitValues && "Waiting for value to be emitted"); + assert(inAnyMap(StateStack.back().getState())); + if (inFlowMapAnyKey(StateStack.back().getState())) { + StateStack.pop_back(); + outputUpToEndOfLine(" }"); + } else { + assert(inMapAnyKey(StateStack.back().getState())); + // If we did not map anything, we should explicitly emit an empty map + if (StateStack.back().getState() == inMapFirstKey) { + Padding = ContainerPaddingStack.pop_back_val(); + newLineCheck(); + output("{}"); + Padding = "\n"; + } + StateStack.pop_back(); + } +} + +void llvm::yaml::OutputStream::enterSequence(bool IsFlow) { + assert(CanEmitValues); + if (IsFlow) { + StateStack.push_back(inFlowSeqFirstElement); + newLineCheck(); + StateStack.back().setColumn(Column); + output("[ "); + } else { + StateStack.emplace_back(inSeqFirstElement); + ContainerPaddingStack.push_back(Padding); + Padding = "\n"; + } + // Prevent emitting values until preEmitSequence is called. + DEBUGONLY(CanEmitValues = false); +} + +void llvm::yaml::OutputStream::preEmitSequence() { + assert(inAnySequence(StateStack.back().getState())); + assert(!CanEmitValues); + if (inFlowSeqAnyElement(StateStack.back().getState())) { + if (StateStack.back().getState() == inFlowSeqOtherElement) + output(", "); + wrapFlow(); + } + DEBUGONLY(CanEmitValues = true); +} + +void llvm::yaml::OutputStream::advanceSequence() { + assert(inAnySequence(StateStack.back().getState())); + assert(!CanEmitValues && "Waiting for sequence value to be emitted"); + StateStack.back().setState( + static_cast(StateStack.back().getState() | 2U)); +} + +void llvm::yaml::OutputStream::exitSequence() { + assert(!CanEmitValues && "Waiting for sequence value to be emitted"); + if (inFlowSeqAnyElement(StateStack.back().getState())) { + StateStack.pop_back(); + outputUpToEndOfLine(" ]"); + } else { + assert(inSeqAnyElement(StateStack.back().getState())); + // If we did not emit anything, we should explicitly emit an empty + // sequence + if (StateStack.back().getState() == inSeqFirstElement) { + Padding = ContainerPaddingStack.pop_back_val(); + newLineCheck(/*EmptySequence=*/true); + output("[]"); + Padding = "\n"; + } + StateStack.pop_back(); + } + DEBUGONLY(CanEmitValues = false); +} + +void llvm::yaml::OutputStream::enterBitset() { + assert(CanEmitValues); + newLineCheck(); + StateStack.emplace_back(inBitsetFirst).setColumn(Column); + output("[ "); + // Not technically correct, but all bitset output should go through emitBitset + // we can ignore that logic in there. + DEBUGONLY(CanEmitValues = false); +} + +void llvm::yaml::OutputStream::emitBitset(StringRef Str) { + assert(!CanEmitValues); + if (StateStack.back().getState() == inBitsetOther) + output(", "); + else { + assert(StateStack.back().getState() == inBitsetFirst); + StateStack.back().setState(inBitsetOther); + } + wrapFlow(); + output(Str); +} + +void llvm::yaml::OutputStream::exitBitset() { + assert(!CanEmitValues); + assert(inBitset(StateStack.back().getState())); + StateStack.pop_back(); + outputUpToEndOfLine(" ]"); +} diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp --- a/llvm/lib/Support/YAMLTraits.cpp +++ b/llvm/lib/Support/YAMLTraits.cpp @@ -465,7 +465,7 @@ //===----------------------------------------------------------------------===// Output::Output(raw_ostream &yout, void *context, int WrapColumn) - : IO(context), Out(yout), WrapColumn(WrapColumn) {} + : IO(context), Out(yout, WrapColumn) {} Output::~Output() = default; @@ -473,53 +473,15 @@ return true; } -void Output::beginMapping() { - StateStack.push_back(inMapFirstKey); - PaddingBeforeContainer = Padding; - Padding = "\n"; -} +void Output::beginMapping() { Out.enterMap(false); } bool Output::mapTag(StringRef Tag, bool Use) { - if (Use) { - // If this tag is being written inside a sequence we should write the start - // of the sequence before writing the tag, otherwise the tag won't be - // attached to the element in the sequence, but rather the sequence itself. - bool SequenceElement = false; - if (StateStack.size() > 1) { - auto &E = StateStack[StateStack.size() - 2]; - SequenceElement = inSeqAnyElement(E) || inFlowSeqAnyElement(E); - } - if (SequenceElement && StateStack.back() == inMapFirstKey) { - newLineCheck(); - } else { - output(" "); - } - output(Tag); - if (SequenceElement) { - // If we're writing the tag during the first element of a map, the tag - // takes the place of the first element in the sequence. - if (StateStack.back() == inMapFirstKey) { - StateStack.pop_back(); - StateStack.push_back(inMapOtherKey); - } - // Tags inside maps in sequences should act as keys in the map from a - // formatting perspective, so we always want a newline in a sequence. - Padding = "\n"; - } - } + if (Use) + Out.outputTag(Tag); return Use; } -void Output::endMapping() { - // If we did not map anything, we should explicitly emit an empty map - if (StateStack.back() == inMapFirstKey) { - Padding = PaddingBeforeContainer; - newLineCheck(); - output("{}"); - Padding = "\n"; - } - StateStack.pop_back(); -} +void Output::endMapping() { Out.exitMap(); } std::vector Output::keys() { report_fatal_error("invalid call"); @@ -530,121 +492,59 @@ UseDefault = false; SaveInfo = nullptr; if (Required || !SameAsDefault || WriteDefaultValues) { - auto State = StateStack.back(); - if (State == inFlowMapFirstKey || State == inFlowMapOtherKey) { - flowKey(Key); - } else { - newLineCheck(); - paddedKey(Key); - } + Out.emitMapKey(Key); return true; } return false; } -void Output::postflightKey(void *) { - if (StateStack.back() == inMapFirstKey) { - StateStack.pop_back(); - StateStack.push_back(inMapOtherKey); - } else if (StateStack.back() == inFlowMapFirstKey) { - StateStack.pop_back(); - StateStack.push_back(inFlowMapOtherKey); - } -} +void Output::postflightKey(void *) { Out.advanceMap(); } -void Output::beginFlowMapping() { - StateStack.push_back(inFlowMapFirstKey); - newLineCheck(); - ColumnAtMapFlowStart = Column; - output("{ "); -} +void Output::beginFlowMapping() { Out.enterMap(true); } -void Output::endFlowMapping() { - StateStack.pop_back(); - outputUpToEndOfLine(" }"); -} +void Output::endFlowMapping() { Out.exitMap(); } -void Output::beginDocuments() { - outputUpToEndOfLine("---"); -} +void Output::beginDocuments() {} bool Output::preflightDocument(unsigned index) { - if (index > 0) - outputUpToEndOfLine("\n---"); + Out.newDocument(index == 0); return true; } void Output::postflightDocument() { } -void Output::endDocuments() { - output("\n...\n"); -} +void Output::endDocuments() { Out.endDocuments(); } unsigned Output::beginSequence() { - StateStack.push_back(inSeqFirstElement); - PaddingBeforeContainer = Padding; - Padding = "\n"; + Out.enterSequence(false); return 0; } -void Output::endSequence() { - // If we did not emit anything, we should explicitly emit an empty sequence - if (StateStack.back() == inSeqFirstElement) { - Padding = PaddingBeforeContainer; - newLineCheck(/*EmptySequence=*/true); - output("[]"); - Padding = "\n"; - } - StateStack.pop_back(); -} +void Output::endSequence() { Out.exitSequence(); } bool Output::preflightElement(unsigned, void *&SaveInfo) { + Out.preEmitSequence(); SaveInfo = nullptr; return true; } -void Output::postflightElement(void *) { - if (StateStack.back() == inSeqFirstElement) { - StateStack.pop_back(); - StateStack.push_back(inSeqOtherElement); - } else if (StateStack.back() == inFlowSeqFirstElement) { - StateStack.pop_back(); - StateStack.push_back(inFlowSeqOtherElement); - } -} +void Output::postflightElement(void *) { Out.advanceSequence(); } unsigned Output::beginFlowSequence() { - StateStack.push_back(inFlowSeqFirstElement); - newLineCheck(); - ColumnAtFlowStart = Column; - output("[ "); - NeedFlowSequenceComma = false; + Out.enterSequence(true); return 0; } -void Output::endFlowSequence() { - StateStack.pop_back(); - outputUpToEndOfLine(" ]"); -} +void Output::endFlowSequence() { Out.exitSequence(); } bool Output::preflightFlowElement(unsigned, void *&SaveInfo) { - if (NeedFlowSequenceComma) - output(", "); - if (WrapColumn && Column > WrapColumn) { - output("\n"); - for (int i = 0; i < ColumnAtFlowStart; ++i) - output(" "); - Column = ColumnAtFlowStart; - output(" "); - } + Out.preEmitSequence(); SaveInfo = nullptr; return true; } -void Output::postflightFlowElement(void *) { - NeedFlowSequenceComma = true; -} +void Output::postflightFlowElement(void *) { Out.advanceSequence(); } void Output::beginEnumScalar() { EnumerationMatchFound = false; @@ -652,8 +552,7 @@ bool Output::matchEnumScalar(const char *Str, bool Match) { if (Match && !EnumerationMatchFound) { - newLineCheck(); - outputUpToEndOfLine(Str); + Out.outputScalar(Str, QuotingType::None); EnumerationMatchFound = true; } return false; @@ -672,211 +571,35 @@ } bool Output::beginBitSetScalar(bool &DoClear) { - newLineCheck(); - output("[ "); - NeedBitValueComma = false; + Out.enterBitset(); DoClear = false; return true; } bool Output::bitSetMatch(const char *Str, bool Matches) { if (Matches) { - if (NeedBitValueComma) - output(", "); - output(Str); - NeedBitValueComma = true; + Out.emitBitset(Str); } return false; } -void Output::endBitSetScalar() { - outputUpToEndOfLine(" ]"); -} +void Output::endBitSetScalar() { Out.exitBitset(); } void Output::scalarString(StringRef &S, QuotingType MustQuote) { - newLineCheck(); - if (S.empty()) { - // Print '' for the empty string because leaving the field empty is not - // allowed. - outputUpToEndOfLine("''"); - return; - } - if (MustQuote == QuotingType::None) { - // Only quote if we must. - outputUpToEndOfLine(S); - return; - } - - const char *const Quote = MustQuote == QuotingType::Single ? "'" : "\""; - output(Quote); // Starting quote. - - // When using double-quoted strings (and only in that case), non-printable characters may be - // present, and will be escaped using a variety of unicode-scalar and special short-form - // escapes. This is handled in yaml::escape. - if (MustQuote == QuotingType::Double) { - output(yaml::escape(S, /* EscapePrintable= */ false)); - outputUpToEndOfLine(Quote); - return; - } - - unsigned i = 0; - unsigned j = 0; - unsigned End = S.size(); - const char *Base = S.data(); - - // When using single-quoted strings, any single quote ' must be doubled to be escaped. - while (j < End) { - if (S[j] == '\'') { // Escape quotes. - output(StringRef(&Base[i], j - i)); // "flush". - output(StringLiteral("''")); // Print it as '' - i = j + 1; - } - ++j; - } - output(StringRef(&Base[i], j - i)); - outputUpToEndOfLine(Quote); // Ending quote. + Out.outputScalar(S, MustQuote); } -void Output::blockScalarString(StringRef &S) { - if (!StateStack.empty()) - newLineCheck(); - output(" |"); - outputNewLine(); - - unsigned Indent = StateStack.empty() ? 1 : StateStack.size(); - - auto Buffer = MemoryBuffer::getMemBuffer(S, "", false); - for (line_iterator Lines(*Buffer, false); !Lines.is_at_end(); ++Lines) { - for (unsigned I = 0; I < Indent; ++I) { - output(" "); - } - output(*Lines); - outputNewLine(); - } -} +void Output::blockScalarString(StringRef &S) { Out.outputBlockScalar(S); } -void Output::scalarTag(std::string &Tag) { - if (Tag.empty()) - return; - newLineCheck(); - output(Tag); - output(" "); -} +void Output::scalarTag(std::string &Tag) { Out.outputScalarTag(Tag); } void Output::setError(const Twine &message) { } -bool Output::canElideEmptySequence() { - // Normally, with an optional key/value where the value is an empty sequence, - // the whole key/value can be not written. But, that produces wrong yaml - // if the key/value is the only thing in the map and the map is used in - // a sequence. This detects if the this sequence is the first key/value - // in map that itself is embedded in a sequence. - if (StateStack.size() < 2) - return true; - if (StateStack.back() != inMapFirstKey) - return true; - return !inSeqAnyElement(StateStack[StateStack.size() - 2]); -} - -void Output::output(StringRef s) { - Column += s.size(); - Out << s; -} - -void Output::outputUpToEndOfLine(StringRef s) { - output(s); - if (StateStack.empty() || (!inFlowSeqAnyElement(StateStack.back()) && - !inFlowMapAnyKey(StateStack.back()))) - Padding = "\n"; -} - -void Output::outputNewLine() { - Out << "\n"; - Column = 0; -} - -// if seq at top, indent as if map, then add "- " -// if seq in middle, use "- " if firstKey, else use " " -// - -void Output::newLineCheck(bool EmptySequence) { - if (Padding != "\n") { - output(Padding); - Padding = {}; - return; - } - outputNewLine(); - Padding = {}; - - if (StateStack.size() == 0 || EmptySequence) - return; - - unsigned Indent = StateStack.size() - 1; - bool OutputDash = false; - - if (StateStack.back() == inSeqFirstElement || - StateStack.back() == inSeqOtherElement) { - OutputDash = true; - } else if ((StateStack.size() > 1) && - ((StateStack.back() == inMapFirstKey) || - inFlowSeqAnyElement(StateStack.back()) || - (StateStack.back() == inFlowMapFirstKey)) && - inSeqAnyElement(StateStack[StateStack.size() - 2])) { - --Indent; - OutputDash = true; - } - - for (unsigned i = 0; i < Indent; ++i) { - output(" "); - } - if (OutputDash) { - output("- "); - } -} - -void Output::paddedKey(StringRef key) { - output(key); - output(":"); - const char *spaces = " "; - if (key.size() < strlen(spaces)) - Padding = &spaces[key.size()]; - else - Padding = " "; -} - -void Output::flowKey(StringRef Key) { - if (StateStack.back() == inFlowMapOtherKey) - output(", "); - if (WrapColumn && Column > WrapColumn) { - output("\n"); - for (int I = 0; I < ColumnAtMapFlowStart; ++I) - output(" "); - Column = ColumnAtMapFlowStart; - output(" "); - } - output(Key); - output(": "); -} +bool Output::canElideEmptySequence() { return Out.canElideEmptySequence(); } NodeKind Output::getNodeKind() { report_fatal_error("invalid call"); } -bool Output::inSeqAnyElement(InState State) { - return State == inSeqFirstElement || State == inSeqOtherElement; -} - -bool Output::inFlowSeqAnyElement(InState State) { - return State == inFlowSeqFirstElement || State == inFlowSeqOtherElement; -} - -bool Output::inMapAnyKey(InState State) { - return State == inMapFirstKey || State == inMapOtherKey; -} - -bool Output::inFlowMapAnyKey(InState State) { - return State == inFlowMapFirstKey || State == inFlowMapOtherKey; -} - //===----------------------------------------------------------------------===// // traits for built-in types //===----------------------------------------------------------------------===// diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -97,6 +97,7 @@ WithColorTest.cpp YAMLIOTest.cpp YAMLParserTest.cpp + YAMLSerializerTest.cpp buffer_ostream_test.cpp formatted_raw_ostream_test.cpp raw_fd_stream_test.cpp diff --git a/llvm/unittests/Support/YAMLSerializerTest.cpp b/llvm/unittests/Support/YAMLSerializerTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Support/YAMLSerializerTest.cpp @@ -0,0 +1,94 @@ +//===- unittest/Support/YAMLSerializerTest --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/YamlSerializer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/YAMLParser.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" +#include + +namespace llvm { +using yaml::OutputStream; + +static void eatDiagnostic(const SMDiagnostic &, void *) {} + +static bool isParseable(StringRef Input) { + SourceMgr Mgr; + Mgr.setDiagHandler(eatDiagnostic); + yaml::Stream InStream(Input, Mgr); + InStream.skip(); + return !InStream.failed(); +} + +TEST(YAMLSerializer, Builders) { + std::string Output; + llvm::raw_string_ostream Stream(Output); + { + yaml::OutputStream Yaml(Stream); + Yaml.newDocument(/*First=*/true); + auto BuildMap = Yaml.buildMap(/*Flow=*/false); + BuildMap.emit("ScalarTest", "Value"); + BuildMap.emit("FlowMap", [](OutputStream &O) { + O.buildMap(/*Flow=*/true) + .emit("X", "5", yaml::QuotingType::None) + .emit("Y", "4", yaml::QuotingType::Single) + .emit("Z", "3", yaml::QuotingType::Double); + }); + BuildMap.emit("MapTest", [](OutputStream &O) { + O.buildMap(/*Flow=*/false) + .emit("FlowFirst", + [](OutputStream &O) { + O.buildSequence(/*Flow=*/true) + .emit("Item1") + .emit("Item2") + .emit("Item3"); + }) + .emit("NonFlow", + [](OutputStream &O) { + O.buildSequence(/*Flow=*/false) + .emit("ItemA") + .emit("ItemB") + .emit("ItemC"); + }) + .emit("Bitset", [](OutputStream &O) { + O.buildBitset().emit("Bits1").emit("Bits2").emit("Bits3"); + }); + }); + } + Stream.flush(); + EXPECT_EQ(Output, R"(--- +ScalarTest: Value +FlowMap: { X: 5, Y: '4', Z: "3" } +MapTest: + FlowFirst: [ Item1, Item2, Item3 ] + NonFlow: + - ItemA + - ItemB + - ItemC + Bitset: [ Bits1, Bits2, Bits3 ] +... +)"); + EXPECT_TRUE(isParseable(Output)); +} + +TEST(YAMLSerializer, WriteVector) { + std::vector Values = {1, 2, 3, 4, 5}; + std::string Output; + llvm::raw_string_ostream Stream(Output); + { + yaml::OutputStream Yaml(Stream); + auto Seq = Yaml.buildSequence(true); + for (auto Item : Values) + Seq.emit(std::to_string(Item), yaml::QuotingType::None); + } + Stream.flush(); + EXPECT_EQ(Output, R"([ 1, 2, 3, 4, 5 ])"); + EXPECT_TRUE(isParseable(Output)); +} +} // namespace llvm