diff --git a/clang/test/Lexer/comment-invalid-utf8.c b/clang/test/Lexer/comment-invalid-utf8.c --- a/clang/test/Lexer/comment-invalid-utf8.c +++ b/clang/test/Lexer/comment-invalid-utf8.c @@ -25,3 +25,14 @@ // abcd // €abcd // expected-warning@-1 {{invalid UTF-8 in comment}} + + +//§ § § 😀 你好 © + +/*§ § § 😀 你好 ©*/ + +/* +§ § § 😀 你好 © +*/ + +/* § § § 😀 你好 © */ diff --git a/llvm/lib/Support/ConvertUTF.cpp b/llvm/lib/Support/ConvertUTF.cpp --- a/llvm/lib/Support/ConvertUTF.cpp +++ b/llvm/lib/Support/ConvertUTF.cpp @@ -423,7 +423,7 @@ */ unsigned getUTF8SequenceSize(const UTF8 *source, const UTF8 *sourceEnd) { int length = trailingBytesForUTF8[*source] + 1; - return (length > sourceEnd - source && isLegalUTF8(source, length)) ? length + return (length < sourceEnd - source && isLegalUTF8(source, length)) ? length : 0; }