Index: llvm/lib/Support/CommandLine.cpp =================================================================== --- llvm/lib/Support/CommandLine.cpp +++ llvm/lib/Support/CommandLine.cpp @@ -36,6 +36,13 @@ #include #include #include + +#ifdef LLVM_ON_WIN32 +#include "Windows/WindowsSupport.h" +#undef max +#undef min +#endif + using namespace llvm; using namespace cl; @@ -662,6 +669,49 @@ S[0] == '\xef' && S[1] == '\xbb' && S[2] == '\xbf'); } +#ifdef LLVM_ON_WIN32 +// Convert system-locale encoded string to UTF8 +static bool convertANSIToUTF8String(ArrayRef SrcBytes, std::string &Out) { + assert(Out.empty()); + + if (SrcBytes.empty()) + return true; + + SmallVector utf16; + SmallVector utf8; + + int len = ::MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, SrcBytes.data(), + SrcBytes.size(), utf16.begin(), 0); + if (len == 0) + return false; + + utf16.reserve(len + 1); + utf16.set_size(len); + + len = ::MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, SrcBytes.data(), + SrcBytes.size(), utf16.begin(), utf16.size()); + if (len == 0) + return false; + + len = ::WideCharToMultiByte(CP_UTF8, 0, utf16.begin(), utf16.size(), + utf8.begin(), 0, nullptr, nullptr); + if (len == 0) + return false; + + utf8.reserve(len + 1); + utf8.set_size(len); + + len = ::WideCharToMultiByte(CP_UTF8, 0, utf16.begin(), utf16.size(), + utf8.data(), utf8.size(), nullptr, nullptr); + if (len == 0) + return false; + + Out.resize(utf8.size()); + std::copy(utf8.begin(), utf8.end(), Out.begin()); + return true; +} +#endif // LLVM_ON_WIN32 + static bool ExpandResponseFile(const char *FName, StringSaver &Saver, TokenizerCallback Tokenizer, SmallVectorImpl &NewArgv, @@ -686,6 +736,19 @@ // Reference: http://en.wikipedia.org/wiki/UTF-8#Byte_order_mark else if (hasUTF8ByteOrderMark(BufRef)) Str = StringRef(BufRef.data() + 3, BufRef.size() - 3); +#ifdef LLVM_ON_WIN32 + // Otherwise, this might be a hand-written text file encoded in the system's + // default code page. + else { + const UTF8 *Begin = reinterpret_cast(BufRef.begin()), + if (!isLegalUTF8String(&Begin, + reinterpret_cast(BufRef.end()))) { + if (!convertANSIToUTF8String(BufRef, UTF8Buf)) + return false; + Str = StringRef(UTF8Buf); + } + } +#endif // Tokenize the contents into NewArgv. Tokenizer(Str, Saver, NewArgv, MarkEOLs);