diff --git a/llvm/include/llvm/Support/YAMLParser.h b/llvm/include/llvm/Support/YAMLParser.h --- a/llvm/include/llvm/Support/YAMLParser.h +++ b/llvm/include/llvm/Support/YAMLParser.h @@ -11,7 +11,6 @@ // See http://www.yaml.org/spec/1.2/spec.html for the full standard. // // This currently does not implement the following: -// * Multi-line literal folding. // * Tag resolution. // * UTF-16. // * BOMs anywhere other than the first Unicode scalar value in the file. diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -393,6 +393,9 @@ /// Pos is whitespace or a new line bool isBlankOrBreak(StringRef::iterator Position); + /// Return true if the line is a line break, false otherwise. + bool isLineBreak(StringRef Line); + /// Consume a single b-break[28] if it's present at the current position. /// /// Return false if the code unit at the current position isn't a line break. @@ -471,6 +474,13 @@ /// Scan a block scalar starting with | or >. bool scanBlockScalar(bool IsLiteral); + /// Check if the scalar block is folded. If scalar is folded `NewlineOrSpace` + /// will be assigned to a space ` `. + /// + /// Return true if the `Position` start with `>`. + bool isFoldedBlock(StringRef::iterator Position, char &NewlineOrSpace, + char ChompingIndicator); + /// Scan a chomping indicator in a block scalar header. char scanBlockChompingIndicator(); @@ -1035,6 +1045,10 @@ *Position == '\n'; } +bool Scanner::isLineBreak(StringRef Line) { + return Line == "\n" || Line == "\r" || Line == "\t\n"; +} + bool Scanner::consumeLineBreakIfPresent() { auto Next = skip_b_break(Current); if (Next == Current) @@ -1517,6 +1531,16 @@ return true; } +bool Scanner::isFoldedBlock(StringRef::iterator Position, char &NewlineOrSpace, + char ChompingIndicator) { + bool IsFolded = false; + if (*Position == '>') { + IsFolded = true; + NewlineOrSpace = ' '; + } + return IsFolded; +} + char Scanner::scanBlockChompingIndicator() { char Indicator = ' '; if (Current != End && (*Current == '+' || *Current == '-')) { @@ -1657,8 +1681,10 @@ bool Scanner::scanBlockScalar(bool IsLiteral) { // Eat '|' or '>' assert(*Current == '|' || *Current == '>'); + const auto *Start = Current; skip(1); + char NewlineOrSpace = '\n'; char ChompingIndicator; unsigned BlockIndent; bool IsDone = false; @@ -1666,8 +1692,9 @@ return false; if (IsDone) return true; + bool IsFolded = isFoldedBlock(Start, NewlineOrSpace, ChompingIndicator); - auto Start = Current; + Start = Current; unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent; unsigned LineBreaks = 0; if (BlockIndent == 0) { @@ -1688,11 +1715,23 @@ auto LineStart = Current; advanceWhile(&Scanner::skip_nb_char); if (LineStart != Current) { - Str.append(LineBreaks, '\n'); + if (Str.empty() && IsFolded) { + Str.append(LineBreaks, '\n'); + } else { + if (Str.str().back() != '\n') + Str.append(LineBreaks, NewlineOrSpace); + } Str.append(StringRef(LineStart, Current - LineStart)); LineBreaks = 0; } + if (IsFolded && !Str.empty()) { + // Handling empty lines in a folded string literal. + auto Line = StringRef(LineStart, Current - LineStart + 1); + if (isLineBreak(Line)) + Str.append("\n"); + } + // Check for EOF. if (Current == End) break; diff --git a/llvm/test/YAMLParser/spec-09-24.test b/llvm/test/YAMLParser/spec-09-24.test --- a/llvm/test/YAMLParser/spec-09-24.test +++ b/llvm/test/YAMLParser/spec-09-24.test @@ -1,13 +1,52 @@ # RUN: yaml-bench -canonical %s | FileCheck %s # CHECK: ? !!str "strip" -# CHECK: : !!str "" +# CHECK: : !!str "Hello\n\nworld on multiple \n\nlines\nspaces stripped" # CHECK: ? !!str "clip" -# CHECK: : !!str "" +# CHECK: : !!str "\nHello world on multiple\nlines only one space at the \n\nend\n" # CHECK: ? !!str "keep" -# CHECK: : !!str "\n" +# CHECK: : !!str "\nHello\nworld\non\nmultiple\n\n\nlines\nwith space at the end of each\nline\n\n" +# CHECK: ? !!str "example" +# CHECK: : !!str "Several lines of text,\nwith some \"quotes\" of various 'types',\nand also a blank line:\n\nplus another line at the end." strip: >- + Hello + + world + on + multiple + + + lines + + spaces stripped clip: > + Hello + world + on + multiple + + lines + only one space at the + + + end keep: |+ + + Hello + world + on + multiple + + + lines + with space at the end of each + line + +example: |- + Several lines of text, + with some "quotes" of various 'types', + and also a blank line: + + plus another line at the end.