diff --git a/llvm/include/llvm/Support/YAMLParser.h b/llvm/include/llvm/Support/YAMLParser.h --- a/llvm/include/llvm/Support/YAMLParser.h +++ b/llvm/include/llvm/Support/YAMLParser.h @@ -11,7 +11,6 @@ // See http://www.yaml.org/spec/1.2/spec.html for the full standard. // // This currently does not implement the following: -// * Multi-line literal folding. // * Tag resolution. // * UTF-16. // * BOMs anywhere other than the first Unicode scalar value in the file. diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -392,6 +392,9 @@ /// Pos is whitespace or a new line bool isBlankOrBreak(StringRef::iterator Position); + /// Return true if the line is a line break, false otherwise. + bool isLineEmpty(StringRef Line); + /// Consume a single b-break[28] if it's present at the current position. /// /// Return false if the code unit at the current position isn't a line break. @@ -470,6 +473,18 @@ /// Scan a block scalar starting with | or >. bool scanBlockScalar(bool IsLiteral); + /// Scan a block scalar style indicator and header. + /// + /// Note: This is distinct from scanBlockScalarHeader to mirror the fact that + /// YAML does not consider the style indicator to be a part of the header. + /// + /// Return false if an error occurred. + bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator, + unsigned &IndentIndicator, bool &IsDone); + + /// Scan a style indicator in a block scalar header. + char scanBlockStyleIndicator(); + /// Scan a chomping indicator in a block scalar header. char scanBlockChompingIndicator(); @@ -1034,6 +1049,13 @@ *Position == '\n'; } +bool Scanner::isLineEmpty(StringRef Line) { + for (const auto *Position = Line.begin(); Position != Line.end(); ++Position) + if (!isBlankOrBreak(Position)) + return false; + return true; +} + bool Scanner::consumeLineBreakIfPresent() { auto Next = skip_b_break(Current); if (Next == Current) @@ -1516,6 +1538,25 @@ return true; } +bool Scanner::scanBlockScalarIndicators(char &StyleIndicator, + char &ChompingIndicator, + unsigned &IndentIndicator, + bool &IsDone) { + StyleIndicator = scanBlockStyleIndicator(); + if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone)) + return false; + return true; +} + +char Scanner::scanBlockStyleIndicator() { + char Indicator = ' '; + if (Current != End && (*Current == '>' || *Current == '|')) { + Indicator = *Current; + skip(1); + } + return Indicator; +} + char Scanner::scanBlockChompingIndicator() { char Indicator = ' '; if (Current != End && (*Current == '+' || *Current == '-')) { @@ -1654,19 +1695,19 @@ } bool Scanner::scanBlockScalar(bool IsLiteral) { - // Eat '|' or '>' assert(*Current == '|' || *Current == '>'); - skip(1); - + char StyleIndicator; char ChompingIndicator; unsigned BlockIndent; bool IsDone = false; - if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone)) + if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent, + IsDone)) return false; if (IsDone) return true; + bool IsFolded = StyleIndicator == '>'; - auto Start = Current; + const auto *Start = Current; unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent; unsigned LineBreaks = 0; if (BlockIndent == 0) { @@ -1687,6 +1728,22 @@ auto LineStart = Current; advanceWhile(&Scanner::skip_nb_char); if (LineStart != Current) { + if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) { + // The folded style "folds" any single line break between content into a + // single space, except when that content is "empty" (only contains + // whitespace) in which case the line break is left as-is. + if (LineBreaks == 1) { + Str.append(LineBreaks, + isLineEmpty(StringRef(LineStart, Current - LineStart)) + ? '\n' + : ' '); + } + // If we saw a single line break, we are completely replacing it and so + // want `LineBreaks == 0`. Otherwise this decrement accounts for the + // fact that the first line break is "trimmed", only being used to + // signal a sequence of line breaks which should not be folded. + LineBreaks--; + } Str.append(LineBreaks, '\n'); Str.append(StringRef(LineStart, Current - LineStart)); LineBreaks = 0; diff --git a/llvm/test/YAMLParser/spec-09-24.test b/llvm/test/YAMLParser/spec-09-24.test --- a/llvm/test/YAMLParser/spec-09-24.test +++ b/llvm/test/YAMLParser/spec-09-24.test @@ -1,13 +1,103 @@ # RUN: yaml-bench -canonical %s | FileCheck %s -# CHECK: ? !!str "strip" -# CHECK: : !!str "" -# CHECK: ? !!str "clip" -# CHECK: : !!str "" -# CHECK: ? !!str "keep" -# CHECK: : !!str "\n" +# CHECK: ? !!str "literal_strip" +# CHECK: : !!str "Hello\n\n\nworld\non\nmultiple \n\n\nlines\n\nfoo bar" +# CHECK: ? !!str "literal_clip" +# CHECK: : !!str "Hello\n\n\nworld\non\nmultiple \n\n\nlines\n\nfoo bar\n" +# CHECK: ? !!str "literal_keep" +# CHECK: : !!str "Hello\n\n\nworld\non\nmultiple \n\n\nlines\n\nfoo bar\n\n\n\n" +# CHECK: ? !!str "folded_strip" +# CHECK: : !!str "Hello\n\nworld on multiple \n\nlines\nfoo bar" +# CHECK: ? !!str "folded_clip" +# CHECK: : !!str "Hello\n\nworld on multiple \n\nlines\nfoo bar\n" +# CHECK: ? !!str "folded_keep" +# CHECK: : !!str "Hello\n\nworld on multiple \n\nlines\nfoo bar\n\n\n" -strip: >- +literal_strip: |- + Hello -clip: > -keep: |+ + world + on + multiple + + + lines + + foo bar + + + +literal_clip: | + Hello + + + world + on + multiple + + + lines + + foo bar + + + +literal_keep: |+ + Hello + + + world + on + multiple + + + lines + + foo bar + + + +folded_strip: >- + Hello + + + world + on + multiple + + + lines + + foo bar + + + +folded_clip: > + Hello + + + world + on + multiple + + + lines + + foo bar + + + +folded_keep: >+ + Hello + + + world + on + multiple + + + lines + + foo bar + +