Index: include/clang/AST/Expr.h =================================================================== --- include/clang/AST/Expr.h +++ include/clang/AST/Expr.h @@ -1292,6 +1292,7 @@ enum CharacterKind { Ascii, Wide, + UTF8, UTF16, UTF32 }; Index: include/clang/AST/Stmt.h =================================================================== --- include/clang/AST/Stmt.h +++ include/clang/AST/Stmt.h @@ -130,7 +130,7 @@ friend class CharacterLiteral; unsigned : NumExprBits; - unsigned Kind : 2; + unsigned Kind : 3; }; enum APFloatSemantics { Index: include/clang/Lex/LiteralSupport.h =================================================================== --- include/clang/Lex/LiteralSupport.h +++ include/clang/Lex/LiteralSupport.h @@ -166,6 +166,7 @@ bool hadError() const { return HadError; } bool isAscii() const { return Kind == tok::char_constant; } bool isWide() const { return Kind == tok::wide_char_constant; } + bool isUTF8() const { return Kind == tok::utf8_char_constant; } bool isUTF16() const { return Kind == tok::utf16_char_constant; } bool isUTF32() const { return Kind == tok::utf32_char_constant; } bool isMultiChar() const { return IsMultiChar; } Index: lib/AST/StmtPrinter.cpp =================================================================== --- lib/AST/StmtPrinter.cpp +++ lib/AST/StmtPrinter.cpp @@ -1165,6 +1165,7 @@ switch (Node->getKind()) { case CharacterLiteral::Ascii: break; // no prefix. case CharacterLiteral::Wide: OS << 'L'; break; + case CharacterLiteral::UTF8: OS << "u8"; break; case CharacterLiteral::UTF16: OS << 'u'; break; case CharacterLiteral::UTF32: OS << 'U'; break; } Index: lib/Lex/LiteralSupport.cpp =================================================================== --- lib/Lex/LiteralSupport.cpp +++ lib/Lex/LiteralSupport.cpp @@ -983,6 +983,7 @@ /// u' c-char-sequence ' /// U' c-char-sequence ' /// L' c-char-sequence ' +/// u8' c-char-sequence ' [C++1z lex.ccon] /// c-char-sequence: /// c-char /// c-char-sequence c-char Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -3084,6 +3084,8 @@ Kind = CharacterLiteral::UTF16; else if (Literal.isUTF32()) Kind = CharacterLiteral::UTF32; + else if (Literal.isUTF8()) + Kind = CharacterLiteral::UTF8; Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty, Tok.getLocation()); Index: lib/Sema/SemaExprObjC.cpp =================================================================== --- lib/Sema/SemaExprObjC.cpp +++ lib/Sema/SemaExprObjC.cpp @@ -319,6 +319,7 @@ // to use to determine the Objective-c literal kind. switch (Char->getKind()) { case CharacterLiteral::Ascii: + case CharacterLiteral::UTF8: NumberType = Context.CharTy; break; @@ -577,6 +578,7 @@ // to use to determine the Objective-c literal kind. switch (Char->getKind()) { case CharacterLiteral::Ascii: + case CharacterLiteral::UTF8: ValueType = Context.CharTy; break; Index: lib/Sema/SemaTemplate.cpp =================================================================== --- lib/Sema/SemaTemplate.cpp +++ lib/Sema/SemaTemplate.cpp @@ -5503,6 +5503,8 @@ Expr *E; if (T->isAnyCharacterType()) { + // This does not need to handle u8 character literals because those are + // of type char, and so can also be covered by an Ascii character literal. CharacterLiteral::CharacterKind Kind; if (T->isWideCharType()) Kind = CharacterLiteral::Wide; Index: test/Misc/ast-print-char-literal.cpp =================================================================== --- test/Misc/ast-print-char-literal.cpp +++ test/Misc/ast-print-char-literal.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -ast-print -std=c++1z %s -o - | FileCheck %s + +char c = u8'1'; +char d = '1'; +char e = U'1'; +char f = L'1'; +char g = u'1'; + +template +void h(); + +void i() { + h(); +} + +// CHECK: char c = u8'1'; +// CHECK-NEXT: char d = '1'; +// CHECK-NEXT: char e = U'1'; +// CHECK-NEXT: char f = L'1'; +// CHECK-NEXT: char g = u'1'; + +// CHECK: template + +// CHECK: h();