Index: llvm/trunk/test/tools/llvm-rc/Inputs/tag-escape.rc =================================================================== --- llvm/trunk/test/tools/llvm-rc/Inputs/tag-escape.rc +++ llvm/trunk/test/tools/llvm-rc/Inputs/tag-escape.rc @@ -0,0 +1,70 @@ +STRINGTABLE { + 0 "Hello!" + 1 "\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z" + 2 "\A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z" + 3 "\x\x1\x12\x123\x1234\x12345\X\X1\X12\X123\X1234\X12345\x1g" + 4 "\0\01\010\0100\01000\010000\0100000\01000000" + 5 "\1\11\111\1111\11111\111111\1111111\11111111" + 6 """\""\""\\\\\\" + 7 "a\0b\0\0c\0\0\0d\0" + 8 "a\0b\0" + + 20 L"Hello!" + 21 L"\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z" + 22 L"\A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z" + 23 L"\x\x1\x12\x123\x1234\x12345\X\X1\X12\X123\X1234\X12345\x1g" + 24 L"\0\01\010\0100\01000\010000\0100000\01000000" + 25 L"\1\11\111\1111\11111\111111\1111111\11111111" + 26 L"""\""\""\\\\\\" + 27 L"a\0b\0\0c\0\0\0d\0" + 28 L"a\0b\0" +} + +1 MENU { + MENUITEM "Hello!", 0 + MENUITEM "\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z", 1 + MENUITEM "\A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z", 2 + MENUITEM "\x\x1\x12\x123\x1234\x12345\X\X1\X12\X123\X1234\X12345\x1g", 3 + MENUITEM "\0\01\010\0100\01000\010000\0100000\01000000", 4 + MENUITEM "\1\11\111\1111\11111\111111\1111111\11111111", 5 + MENUITEM """\""\""\\\\\\", 6 + MENUITEM "a\0b\0\0c\0\0\0d\0", 7 + MENUITEM "a\0b\0", 8 +} + +2 MENU { + MENUITEM L"Hello!", 0 + MENUITEM L"\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z", 1 + MENUITEM L"\A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z", 2 + MENUITEM L"\x\x1\x12\x123\x1234\x12345\X\X1\X12\X123\X1234\X12345\x1g", 3 + MENUITEM L"\0\01\010\0100\01000\010000\0100000\01000000", 4 + MENUITEM L"\1\11\111\1111\11111\111111\1111111\11111111", 5 + MENUITEM L"""\""\""\\\\\\", 6 + MENUITEM L"a\0b\0\0c\0\0\0d\0", 7 + MENUITEM L"a\0b\0", 8 +} + +500 USERDEFINED { + "Hello!", + "\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z", + "\A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z", + "\x\x1\x12\x123\x1234\x12345\X\X1\X12\X123\X1234\X12345\x1g", + "\0\01\010\0100\01000\010000\0100000\01000000", + "\1\11\111\1111\11111\111111\1111111\11111111", + """\""\""\\\\\\", + "a\0b\0\0c\0\0\0d\0", + "a\0b\0" +} + +501 USERDEFINED { + L"Hello!", + L"\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z", + L"\A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z", + L"\x\x1\x12\x123\x1234\x12345\X\X1\X12\X123\X1234\X12345\x1g", + L"\0\01\010\0100\01000\010000\0100000\01000000", + L"\1\11\111\1111\11111\111111\1111111\11111111", + L"""\""\""\\\\\\", + L"a\0b\0\0c\0\0\0d\0", + L"a\0b\0" +} + Index: llvm/trunk/test/tools/llvm-rc/tag-escape.test =================================================================== --- llvm/trunk/test/tools/llvm-rc/tag-escape.test +++ llvm/trunk/test/tools/llvm-rc/tag-escape.test @@ -0,0 +1,160 @@ +; RUN: llvm-rc /FO %t %p/Inputs/tag-escape.rc +; RUN: llvm-readobj %t | FileCheck %s + +; CHECK: Resource type (int): 4 +; CHECK-NEXT: Resource name (int): 1 +; CHECK-NEXT: Data version: 0 +; CHECK-NEXT: Memory flags: 0x1030 +; CHECK-NEXT: Language ID: 1033 +; CHECK-NEXT: Version (major): 0 +; CHECK-NEXT: Version (minor): 0 +; CHECK-NEXT: Characteristics: 0 +; CHECK-NEXT: Data size: 304 +; CHECK-NEXT: Data: ( +; CHECK-NEXT: 0000: 00000000 00000000 48006500 6C006C00 |........H.e.l.l.| +; CHECK-NEXT: 0010: 6F002100 00000000 01000800 5C006200 |o.!.........\.b.| +; CHECK-NEXT: 0020: 5C006300 5C006400 5C006500 5C006600 |\.c.\.d.\.e.\.f.| +; CHECK-NEXT: 0030: 5C006700 5C006800 5C006900 5C006A00 |\.g.\.h.\.i.\.j.| +; CHECK-NEXT: 0040: 5C006B00 5C006C00 5C006D00 0A005C00 |\.k.\.l.\.m...\.| +; CHECK-NEXT: 0050: 6F005C00 70005C00 71000D00 5C007300 |o.\.p.\.q...\.s.| +; CHECK-NEXT: 0060: 09005C00 75005C00 76005C00 77000000 |..\.u.\.v.\.w...| +; CHECK-NEXT: 0070: 00000200 08005C00 42005C00 43005C00 |......\.B.\.C.\.| +; CHECK-NEXT: 0080: 44005C00 45005C00 46005C00 47005C00 |D.\.E.\.F.\.G.\.| +; CHECK-NEXT: 0090: 48005C00 49005C00 4A005C00 4B005C00 |H.\.I.\.J.\.K.\.| +; CHECK-NEXT: 00A0: 4C005C00 4D005C00 4E005C00 4F005C00 |L.\.M.\.N.\.O.\.| +; CHECK-NEXT: 00B0: 50005C00 51005C00 52005C00 53000900 |P.\.Q.\.R.\.S...| +; CHECK-NEXT: 00C0: 5C005500 5C005600 5C005700 00000000 |\.U.\.V.\.W.....| +; CHECK-NEXT: 00D0: 03000000 00000400 00000000 05000100 |................| +; CHECK-NEXT: 00E0: 09004900 49003100 49003100 31004900 |..I.I.1.I.1.1.I.| +; CHECK-NEXT: 00F0: 31003100 31004900 31003100 31003100 |1.1.1.I.1.1.1.1.| +; CHECK-NEXT: 0100: 49003100 31003100 31003100 00000000 |I.1.1.1.1.1.....| +; CHECK-NEXT: 0110: 06002200 22002200 5C005C00 5C000000 |..".".".\.\.\...| +; CHECK-NEXT: 0120: 00000700 61000000 80000800 61000000 |....a.......a...| +; CHECK-NEXT: ) + +; CHECK-DAG: Resource type (int): 4 +; CHECK-NEXT: Resource name (int): 2 +; CHECK-NEXT: Data version: 0 +; CHECK-NEXT: Memory flags: 0x1030 +; CHECK-NEXT: Language ID: 1033 +; CHECK-NEXT: Version (major): 0 +; CHECK-NEXT: Version (minor): 0 +; CHECK-NEXT: Characteristics: 0 +; CHECK-NEXT: Data size: 116 +; CHECK-NEXT: Data: ( +; CHECK-NEXT: 0000: 00000000 00000000 48006500 6C006C00 |........H.e.l.l.| +; CHECK-NEXT: 0010: 6F002100 00000000 01000800 0A000D00 |o.!.............| +; CHECK-NEXT: 0020: 09000000 00000200 08000900 00000000 |................| +; CHECK-NEXT: 0030: 03000000 00000400 00000000 05000100 |................| +; CHECK-NEXT: 0040: 09004900 49024912 49924992 49923100 |..I.I.I.I.I.I.1.| +; CHECK-NEXT: 0050: 00000000 06002200 22002200 5C005C00 |......".".".\.\.| +; CHECK-NEXT: 0060: 5C000000 00000700 61000000 80000800 |\.......a.......| +; CHECK-NEXT: 0070: 61000000 |a...| +; CHECK-NEXT: ) + +; CHECK-DAG: Resource type (string): USERDEFINED +; CHECK-NEXT: Resource name (int): 500 +; CHECK-NEXT: Data version: 0 +; CHECK-NEXT: Memory flags: 0x30 +; CHECK-NEXT: Language ID: 1033 +; CHECK-NEXT: Version (major): 0 +; CHECK-NEXT: Version (minor): 0 +; CHECK-NEXT: Characteristics: 0 +; CHECK-NEXT: Data size: 195 +; CHECK-NEXT: Data: ( +; CHECK-NEXT: 0000: 48656C6C 6F21085C 625C635C 645C655C |Hello!.\b\c\d\e\| +; CHECK-NEXT: 0010: 665C675C 685C695C 6A5C6B5C 6C5C6D0A |f\g\h\i\j\k\l\m.| +; CHECK-NEXT: 0020: 5C6F5C70 5C710D5C 73095C75 5C765C77 |\o\p\q.\s.\u\v\w| +; CHECK-NEXT: 0030: 005C795C 7A085C42 5C435C44 5C455C46 |.\y\z.\B\C\D\E\F| +; CHECK-NEXT: 0040: 5C475C48 5C495C4A 5C4B5C4C 5C4D5C4E |\G\H\I\J\K\L\M\N| +; CHECK-NEXT: 0050: 5C4F5C50 5C515C52 5C53095C 555C565C |\O\P\Q\R\S.\U\V\| +; CHECK-NEXT: 0060: 57005C59 5C5A0001 12123312 33341233 |W.\Y\Z....3.34.3| +; CHECK-NEXT: 0070: 34350001 12123312 33341233 34350167 |45....3.34.345.g| +; CHECK-NEXT: 0080: 00010808 30083030 08303030 08303030 |....0.00.000.000| +; CHECK-NEXT: 0090: 30083030 30303001 09494931 49313149 |0.00000..II1I11I| +; CHECK-NEXT: 00A0: 31313149 31313131 49313131 31312222 |111I1111I11111""| +; CHECK-NEXT: 00B0: 225C5C5C 61006200 00630000 00640061 |"\\\a.b..c...d.a| +; CHECK-NEXT: 00C0: 006200 |.b.| +; CHECK-NEXT: ) + +; CHECK-DAG: Resource type (string): USERDEFINED +; CHECK-NEXT: Resource name (int): 501 +; CHECK-NEXT: Data version: 0 +; CHECK-NEXT: Memory flags: 0x30 +; CHECK-NEXT: Language ID: 1033 +; CHECK-NEXT: Version (major): 0 +; CHECK-NEXT: Version (minor): 0 +; CHECK-NEXT: Characteristics: 0 +; CHECK-NEXT: Data size: 138 +; CHECK-NEXT: Data: ( +; CHECK-NEXT: 0000: 48006500 6C006C00 6F002100 08000A00 |H.e.l.l.o.!.....| +; CHECK-NEXT: 0010: 0D000900 00000800 09000000 00000100 |................| +; CHECK-NEXT: 0020: 12002301 34123412 35000000 01001200 |..#.4.4.5.......| +; CHECK-NEXT: 0030: 23013412 34123500 01006700 00000100 |#.4.4.5...g.....| +; CHECK-NEXT: 0040: 08004000 00020010 00800080 30000100 |..@.........0...| +; CHECK-NEXT: 0050: 09004900 49024912 49924992 49923100 |..I.I.I.I.I.I.1.| +; CHECK-NEXT: 0060: 22002200 22005C00 5C005C00 61000000 |".".".\.\.\.a...| +; CHECK-NEXT: 0070: 62000000 00006300 00000000 00006400 |b.....c.......d.| +; CHECK-NEXT: 0080: 00006100 00006200 0000 |..a...b...| +; CHECK-NEXT: ) + +; CHECK-DAG: Resource type (int): 6 +; CHECK-NEXT: Resource name (int): 1 +; CHECK-NEXT: Data version: 0 +; CHECK-NEXT: Memory flags: 0x1030 +; CHECK-NEXT: Language ID: 1033 +; CHECK-NEXT: Version (major): 0 +; CHECK-NEXT: Version (minor): 0 +; CHECK-NEXT: Characteristics: 0 +; CHECK-NEXT: Data size: 404 +; CHECK-NEXT: Data: ( +; CHECK-NEXT: 0000: 06004800 65006C00 6C006F00 21002F00 |..H.e.l.l.o.!./.| +; CHECK-NEXT: 0010: 08005C00 62005C00 63005C00 64005C00 |..\.b.\.c.\.d.\.| +; CHECK-NEXT: 0020: 65005C00 66005C00 67005C00 68005C00 |e.\.f.\.g.\.h.\.| +; CHECK-NEXT: 0030: 69005C00 6A005C00 6B005C00 6C005C00 |i.\.j.\.k.\.l.\.| +; CHECK-NEXT: 0040: 6D000A00 5C006F00 5C007000 5C007100 |m...\.o.\.p.\.q.| +; CHECK-NEXT: 0050: 0D005C00 73000900 5C007500 5C007600 |..\.s...\.u.\.v.| +; CHECK-NEXT: 0060: 5C007700 00005C00 79005C00 7A003100 |\.w...\.y.\.z.1.| +; CHECK-NEXT: 0070: 08005C00 42005C00 43005C00 44005C00 |..\.B.\.C.\.D.\.| +; CHECK-NEXT: 0080: 45005C00 46005C00 47005C00 48005C00 |E.\.F.\.G.\.H.\.| +; CHECK-NEXT: 0090: 49005C00 4A005C00 4B005C00 4C005C00 |I.\.J.\.K.\.L.\.| +; CHECK-NEXT: 00A0: 4D005C00 4E005C00 4F005C00 50005C00 |M.\.N.\.O.\.P.\.| +; CHECK-NEXT: 00B0: 51005C00 52005C00 53000900 5C005500 |Q.\.R.\.S...\.U.| +; CHECK-NEXT: 00C0: 5C005600 5C005700 00005C00 59005C00 |\.V.\.W...\.Y.\.| +; CHECK-NEXT: 00D0: 5A001A00 00000100 12001200 33001200 |Z...........3...| +; CHECK-NEXT: 00E0: 33003400 12003300 34003500 00000100 |3.4...3.4.5.....| +; CHECK-NEXT: 00F0: 12001200 33001200 33003400 12003300 |....3...3.4...3.| +; CHECK-NEXT: 0100: 34003500 01006700 17000000 01000800 |4.5...g.........| +; CHECK-NEXT: 0110: 08003000 08003000 30000800 30003000 |..0...0.0...0.0.| +; CHECK-NEXT: 0120: 30000800 30003000 30003000 08003000 |0...0.0.0.0...0.| +; CHECK-NEXT: 0130: 30003000 30003000 17000100 09004900 |0.0.0.0.......I.| +; CHECK-NEXT: 0140: 49003100 49003100 31004900 31003100 |I.1.I.1.1.I.1.1.| +; CHECK-NEXT: 0150: 31004900 31003100 31003100 49003100 |1.I.1.1.1.1.I.1.| +; CHECK-NEXT: 0160: 31003100 31003100 06002200 22002200 |1.1.1.1...".".".| +; CHECK-NEXT: 0170: 5C005C00 5C000300 61000000 62000300 |\.\.\...a...b...| +; CHECK-NEXT: 0180: 61000000 62000000 00000000 00000000 |a...b...........| +; CHECK-NEXT: 0190: 00000000 |....| +; CHECK-NEXT: ) + +; CHECK-DAG: Resource type (int): 6 +; CHECK-NEXT: Resource name (int): 2 +; CHECK-NEXT: Data version: 0 +; CHECK-NEXT: Memory flags: 0x1030 +; CHECK-NEXT: Language ID: 1033 +; CHECK-NEXT: Version (major): 0 +; CHECK-NEXT: Version (minor): 0 +; CHECK-NEXT: Characteristics: 0 +; CHECK-NEXT: Data size: 148 +; CHECK-NEXT: Data: ( +; CHECK-NEXT: 0000: 00000000 00000000 06004800 65006C00 |..........H.e.l.| +; CHECK-NEXT: 0010: 6C006F00 21000400 08000A00 0D000900 |l.o.!...........| +; CHECK-NEXT: 0020: 02000800 09001000 00000100 12002301 |..............#.| +; CHECK-NEXT: 0030: 34123412 35000000 01001200 23013412 |4.4.5.......#.4.| +; CHECK-NEXT: 0040: 34123500 01006700 09000000 01000800 |4.5...g.........| +; CHECK-NEXT: 0050: 40000002 00100080 00803000 09000100 |@.........0.....| +; CHECK-NEXT: 0060: 09004900 49024912 49924992 49923100 |..I.I.I.I.I.I.1.| +; CHECK-NEXT: 0070: 06002200 22002200 5C005C00 5C000300 |..".".".\.\.\...| +; CHECK-NEXT: 0080: 61000000 62000300 61000000 62000000 |a...b...a...b...| +; CHECK-NEXT: 0090: 00000000 |....| +; CHECK-NEXT: ) + Index: llvm/trunk/tools/llvm-rc/ResourceFileWriter.cpp =================================================================== --- llvm/trunk/tools/llvm-rc/ResourceFileWriter.cpp +++ llvm/trunk/tools/llvm-rc/ResourceFileWriter.cpp @@ -122,24 +122,175 @@ CutAtDoubleNull // Terminate string on '\0\0'; strip final '\0'. }; -// Parses an identifier or string and returns a processed version of it. -// For now, it only strips the string boundaries, but TODO: +// Parses an identifier or string and returns a processed version of it: +// * String the string boundary quotes. // * Squash "" to a single ". // * Replace the escape sequences with their processed version. // For identifiers, this is no-op. static Error processString(StringRef Str, NullHandlingMethod NullHandler, bool &IsLongString, SmallVectorImpl &Result) { bool IsString = stripQuotes(Str, IsLongString); - convertUTF8ToUTF16String(Str, Result); + SmallVector Chars; + convertUTF8ToUTF16String(Str, Chars); if (!IsString) { // It's an identifier if it's not a string. Make all characters uppercase. - for (UTF16 &Ch : Result) { + for (UTF16 &Ch : Chars) { assert(Ch <= 0x7F && "We didn't allow identifiers to be non-ASCII"); Ch = toupper(Ch); } + Result.swap(Chars); return Error::success(); } + Result.reserve(Chars.size()); + size_t Pos = 0; + + auto AddRes = [&Result, NullHandler, IsLongString](UTF16 Char) -> Error { + if (!IsLongString) { + if (NullHandler == NullHandlingMethod::UserResource) { + // Narrow strings in user-defined resources are *not* output in + // UTF-16 format. + if (Char > 0xFF) + return createError("Non-8-bit codepoint (" + Twine(Char) + + ") can't occur in a user-defined narrow string"); + + } else { + // In case of narrow non-user strings, Windows RC converts + // [0x80, 0xFF] chars according to the current codepage. + // There is no 'codepage' concept settled in every supported platform, + // so we should reject such inputs. + if (Char > 0x7F && Char <= 0xFF) + return createError("Non-ASCII 8-bit codepoint (" + Twine(Char) + + ") can't " + "occur in a non-Unicode string"); + } + } + + Result.push_back(Char); + return Error::success(); + }; + + while (Pos < Chars.size()) { + UTF16 CurChar = Chars[Pos]; + ++Pos; + + // Strip double "". + if (CurChar == '"') { + if (Pos == Chars.size() || Chars[Pos] != '"') + return createError("Expected \"\""); + ++Pos; + RETURN_IF_ERROR(AddRes('"')); + continue; + } + + if (CurChar == '\\') { + UTF16 TypeChar = Chars[Pos]; + ++Pos; + + if (TypeChar == 'x' || TypeChar == 'X') { + // Read a hex number. Max number of characters to read differs between + // narrow and wide strings. + UTF16 ReadInt = 0; + size_t RemainingChars = IsLongString ? 4 : 2; + // We don't want to read non-ASCII hex digits. std:: functions past + // 0xFF invoke UB. + // + // FIXME: actually, Microsoft version probably doesn't check this + // condition and uses their Unicode version of 'isxdigit'. However, + // there are some hex-digit Unicode character outside of ASCII, and + // some of these are actually accepted by rc.exe, the notable example + // being fullwidth forms (U+FF10..U+FF19 etc.) These can be written + // instead of ASCII digits in \x... escape sequence and get accepted. + // However, the resulting hexcodes seem totally unpredictable. + // We think it's infeasible to try to reproduce this behavior, nor to + // put effort in order to detect it. + while (RemainingChars && Pos < Chars.size() && Chars[Pos] < 0x80) { + if (!isxdigit(Chars[Pos])) + break; + char Digit = tolower(Chars[Pos]); + ++Pos; + + ReadInt <<= 4; + if (isdigit(Digit)) + ReadInt |= Digit - '0'; + else + ReadInt |= Digit - 'a' + 10; + + --RemainingChars; + } + + RETURN_IF_ERROR(AddRes(ReadInt)); + continue; + } + + if (TypeChar >= '0' && TypeChar < '8') { + // Read an octal number. Note that we've already read the first digit. + UTF16 ReadInt = TypeChar - '0'; + size_t RemainingChars = IsLongString ? 6 : 2; + + while (RemainingChars && Pos < Chars.size() && Chars[Pos] >= '0' && + Chars[Pos] < '8') { + ReadInt <<= 3; + ReadInt |= Chars[Pos] - '0'; + --RemainingChars; + ++Pos; + } + + RETURN_IF_ERROR(AddRes(ReadInt)); + + continue; + } + + switch (TypeChar) { + case 'A': + case 'a': + // Windows '\a' translates into '\b' (Backspace). + RETURN_IF_ERROR(AddRes('\b')); + break; + + case 'n': // Somehow, RC doesn't recognize '\N' and '\R'. + RETURN_IF_ERROR(AddRes('\n')); + break; + + case 'r': + RETURN_IF_ERROR(AddRes('\r')); + break; + + case 'T': + case 't': + RETURN_IF_ERROR(AddRes('\t')); + break; + + case '\\': + RETURN_IF_ERROR(AddRes('\\')); + break; + + case '"': + // RC accepts \" only if another " comes afterwards; then, \"" means + // a single ". + if (Pos == Chars.size() || Chars[Pos] != '"') + return createError("Expected \\\"\""); + ++Pos; + RETURN_IF_ERROR(AddRes('"')); + break; + + default: + // If TypeChar means nothing, \ is should be output to stdout with + // following char. However, rc.exe consumes these characters when + // dealing with wide strings. + if (!IsLongString) { + RETURN_IF_ERROR(AddRes('\\')); + RETURN_IF_ERROR(AddRes(TypeChar)); + } + break; + } + + continue; + } + + // If nothing interesting happens, just output the character. + RETURN_IF_ERROR(AddRes(CurChar)); + } switch (NullHandler) { case NullHandlingMethod::CutAtNull: Index: llvm/trunk/tools/llvm-rc/ResourceScriptToken.cpp =================================================================== --- llvm/trunk/tools/llvm-rc/ResourceScriptToken.cpp +++ llvm/trunk/tools/llvm-rc/ResourceScriptToken.cpp @@ -219,7 +219,10 @@ } else if (Data[Pos] == '"') { // Consume the ending double-quote. advance(); - return Error::success(); + // However, if another '"' follows this double-quote, the string didn't + // end and we just included '"' into the string. + if (!willNowRead("\"")) + return Error::success(); } else if (Data[Pos] == '\n') { return getStringError("String literal not terminated in the line."); }