Index: clang/lib/AST/MicrosoftMangle.cpp =================================================================== --- clang/lib/AST/MicrosoftMangle.cpp +++ clang/lib/AST/MicrosoftMangle.cpp @@ -3164,9 +3164,9 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL, raw_ostream &Out) { - // ::= 0 # char - // ::= 1 # wchar_t - // ::= ??? # char16_t/char32_t will need a mangling too... + // ::= 0 # char, char16_t, char32_t + // # (little endian char data in mangling) + // ::= 1 # wchar_t (big endian char data in mangling) // // ::= # the length of the literal // @@ -3228,8 +3228,8 @@ // scheme. Mangler.mangleNumber(JC.getCRC()); - // : The mangled name also contains the first 32 _characters_ - // (including null-terminator bytes) of the StringLiteral. + // : The mangled name also contains the first 32 bytes + // (including null-terminator bytes) of the encoded StringLiteral. // Each character is encoded by splitting them into bytes and then encoding // the constituent bytes. auto MangleByte = [&Mangler](char Byte) { @@ -3258,17 +3258,17 @@ } }; - // Enforce our 32 character max. - unsigned NumCharsToMangle = std::min(32U, SL->getLength()); - for (unsigned I = 0, E = NumCharsToMangle * SL->getCharByteWidth(); I != E; - ++I) + // Enforce our 32 bytes max, except wchar_t which gets 32 chars instead. + unsigned MaxBytesToWrite = SL->isWide() ? 64U : 32U; + unsigned NumBytesToWrite = std::min(MaxBytesToWrite, SL->getByteLength()); + for (unsigned I = 0; I != NumBytesToWrite; ++I) if (SL->isWide()) MangleByte(GetBigEndianByte(I)); else MangleByte(GetLittleEndianByte(I)); // Encode the NUL terminator if there is room. - if (NumCharsToMangle < 32) + if (NumBytesToWrite < MaxBytesToWrite) for (unsigned NullTerminator = 0; NullTerminator < SL->getCharByteWidth(); ++NullTerminator) MangleByte(0); Index: clang/test/CodeGenCXX/mangle-ms-string-literals.cpp =================================================================== --- clang/test/CodeGenCXX/mangle-ms-string-literals.cpp +++ clang/test/CodeGenCXX/mangle-ms-string-literals.cpp @@ -719,9 +719,35 @@ // CHECK: @"??_C@_1EK@KFPEBLPK@?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AAA?$AAB@" const wchar_t *UnicodeLiteral = L"\ud7ff"; // CHECK: @"??_C@_13IIHIAFKH@?W?$PP?$AA?$AA@" + const char *U8Literal = u8"hi"; // CHECK: @"??_C@_02PCEFGMJL@hi?$AA@" +const char *LongU8Literal = u8"012345678901234567890123456789ABCDEF"; +// CHECK: @"??_C@_0CF@LABBIIMO@012345678901234567890123456789AB@" + const char16_t *U16Literal = u"hi"; // CHECK: @"??_C@_05OMLEGLOC@h?$AAi?$AA?$AA?$AA@" +// Note this starts with o instead of 0. Else LongWideString would have +// the same initializer and CodeGenModule::ConstantStringMap would map them +// to the same global with a shared mangling. +// FIXME: ConstantStringMap probably shouldn't map things with the same data +// but different manglings to the same variable. +const char16_t *LongU16Literal = u"o12345678901234567890123456789ABCDEF"; +// CHECK: @"??_C@_0EK@FEAOBHPP@o?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA@" + const char32_t *U32Literal = U"hi"; // CHECK: @"??_C@_0M@GFNAJIPG@h?$AA?$AA?$AAi?$AA?$AA?$AA?$AA?$AA?$AA?$AA@" +const char32_t *LongU32Literal = U"012345678901234567890123456789ABCDEF"; +// CHECK: @"??_C@_0JE@IMHFEDAA@0?$AA?$AA?$AA1?$AA?$AA?$AA2?$AA?$AA?$AA3?$AA?$AA?$AA4?$AA?$AA?$AA5?$AA?$AA?$AA6?$AA?$AA?$AA7?$AA?$AA?$AA@" + +// These all have just the right length that the trailing 0 just fits. +const char *MaxASCIIString = "012345678901234567890123456789A"; +// CHECK: @"??_C@_0CA@NMANGEKF@012345678901234567890123456789A?$AA@" +const wchar_t *MaxWideString = L"012345678901234567890123456789A"; +// CHECK: @"??_C@_1EA@LJAFPILO@?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AAA?$AA?$AA@" +const char *MaxU8String = u8"012345678901234567890123456789A"; +// CHECK: @"??_C@_0CA@NMANGEKF@012345678901234567890123456789A?$AA@" +const char16_t *MaxU16String = u"012345678901234"; +// CHECK: @"??_C@_0CA@NFEFHIFO@0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA?$AA?$AA@" +const char32_t *MaxU32String = U"0123456"; +// CHECK: @"??_C@_0CA@KFPHPCC@0?$AA?$AA?$AA1?$AA?$AA?$AA2?$AA?$AA?$AA3?$AA?$AA?$AA4?$AA?$AA?$AA5?$AA?$AA?$AA6?$AA?$AA?$AA?$AA?$AA?$AA?$AA@"