Index: include/llvm/Support/Program.h =================================================================== --- include/llvm/Support/Program.h +++ include/llvm/Support/Program.h @@ -126,6 +126,21 @@ /// argument length limits. bool argumentsFitWithinSystemLimits(ArrayRef Args); + /// Saves the UTF8-encoded \p contents string into the file \p FileName, + /// using the correct encoding and allowing Windows programs to read its + /// contents with international characters. This is necessary for Windows + /// programs that do not understand UTF-8, i.e., when generating response + /// files that link.exe or gcc.exe will read. + /// \returns true if successful, otherwise \p ErrMsg is filled accordingly. + /// \li For Windows, it will use an adequate system-specific codepage or + /// UTF16, depending on \p useUTF16. If set, it will encode in UTF-16 and + /// write the BOM at the beginning of the file. + /// \li For Unix systems, it will always encode in UTF-8 (no conversion is + /// performed). + bool writeFileWithSystemEncoding(const char *FileName, const char *contents, + std::string *ErrMsg = nullptr, + bool useUTF16 = false); + /// This function waits for the process specified by \p PI to finish. /// \returns A \see ProcessInfo struct with Pid set to: /// \li The process id of the child process if the child process has changed Index: lib/Support/Unix/Program.inc =================================================================== --- lib/Support/Unix/Program.inc +++ lib/Support/Unix/Program.inc @@ -19,6 +19,7 @@ #include "Unix.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" #include #if HAVE_SYS_STAT_H #include @@ -440,6 +441,31 @@ return std::error_code(); } +bool llvm::sys::writeFileWithSystemEncoding(const char *FileName, + const char *contents, + std::string *ErrMsg, + bool useUTF16) { + std::string ErrorInfo; + llvm::raw_fd_ostream OS(FileName, ErrorInfo, + llvm::sys::fs::OpenFlags::F_Text); + + if (!ErrorInfo.empty()) { + if (ErrMsg) + *ErrMsg = "could not create file"; + return false; + } + + OS << contents; + + if (OS.has_error()) { + if (ErrMsg) + *ErrMsg = "could not write to file"; + return false; + } + + return true; +} + bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef Args) { static long ArgMax = sysconf(_SC_ARG_MAX); @@ -448,13 +474,13 @@ return true; // Conservatively account for space required by environment variables. - ArgMax /= 2; + long halfArgMax = ArgMax / 2; size_t ArgLength = 0; for (ArrayRef::iterator I = Args.begin(), E = Args.end(); I != E; ++I) { ArgLength += strlen(*I) + 1; - if (ArgLength > size_t(ArgMax)) { + if (ArgLength > size_t(halfArgMax)) { return false; } } Index: lib/Support/Windows/Path.inc =================================================================== --- lib/Support/Windows/Path.inc +++ lib/Support/Windows/Path.inc @@ -891,11 +891,13 @@ return std::error_code(); } -std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, - llvm::SmallVectorImpl &utf8) { +static +std::error_code UTF16ToCodePage(unsigned codepage, const wchar_t *utf16, + size_t utf16_len, + llvm::SmallVectorImpl &utf8) { if (utf16_len) { // Get length. - int len = ::WideCharToMultiByte(CP_UTF8, 0, utf16, utf16_len, utf8.begin(), + int len = ::WideCharToMultiByte(codepage, 0, utf16, utf16_len, utf8.begin(), 0, NULL, NULL); if (len == 0) @@ -905,7 +907,7 @@ utf8.set_size(len); // Now do the actual conversion. - len = ::WideCharToMultiByte(CP_UTF8, 0, utf16, utf16_len, utf8.data(), + len = ::WideCharToMultiByte(codepage, 0, utf16, utf16_len, utf8.data(), utf8.size(), NULL, NULL); if (len == 0) @@ -918,6 +920,16 @@ return std::error_code(); } + +std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, + llvm::SmallVectorImpl &utf8) { + return UTF16ToCodePage(CP_UTF8, utf16, utf16_len, utf8); +} + +std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len, + llvm::SmallVectorImpl &utf8) { + return UTF16ToCodePage(CP_ACP, utf16, utf16_len, utf8); +} } // end namespace windows } // end namespace sys } // end namespace llvm Index: lib/Support/Windows/Program.inc =================================================================== --- lib/Support/Windows/Program.inc +++ lib/Support/Windows/Program.inc @@ -12,7 +12,9 @@ //===----------------------------------------------------------------------===// #include "WindowsSupport.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -166,19 +168,7 @@ } -static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, - const char **envp, const StringRef **redirects, - unsigned memoryLimit, std::string *ErrMsg) { - if (!sys::fs::can_execute(Program)) { - if (ErrMsg) - *ErrMsg = "program not executable"; - return false; - } - - // Windows wants a command line, not an array of args, to pass to the new - // process. We have to concatenate them all, while quoting the args that - // have embedded spaces (or are empty). - +static std::unique_ptr flattenArgs(const char **args) { // First, determine the length of the command line. unsigned len = 0; for (unsigned i = 0; args[i]; i++) { @@ -216,6 +206,22 @@ } *p = 0; + return command; +} + +static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, + const char **envp, const StringRef **redirects, + unsigned memoryLimit, std::string *ErrMsg) { + if (!sys::fs::can_execute(Program)) { + if (ErrMsg) + *ErrMsg = "program not executable"; + return false; + } + + // Windows wants a command line, not an array of args, to pass to the new + // process. We have to concatenate them all, while quoting the args that + // have embedded spaces (or are empty). + std::unique_ptr command = flattenArgs(args); // The pointer to the environment block for the new process. std::vector EnvBlock; @@ -436,6 +442,66 @@ return std::error_code(); } +bool llvm::sys::writeFileWithSystemEncoding(const char *FileName, + const char *contents, + std::string *ErrMsg, + bool useUTF16) { + std::string ErrorInfo; + llvm::raw_fd_ostream OS(FileName, ErrorInfo, + llvm::sys::fs::OpenFlags::F_Text); + + if (!ErrorInfo.empty()) { + if (ErrMsg) + *ErrMsg = "could not create file"; + return false; + } + + if (!useUTF16) { + SmallVector ArgsUTF16; + SmallVector ArgsCurCP; + + if (std::error_code ec = windows::UTF8ToUTF16(contents, ArgsUTF16)) { + if (ErrMsg) + *ErrMsg = "unable to convert to UTF-16"; + SetLastError(ec.value()); + return false; + } + if (std::error_code ec = windows::UTF16ToCurCP( + ArgsUTF16.data(), ArgsUTF16.size(), ArgsCurCP)) { + if (ErrMsg) + *ErrMsg = "unable to convert to current code page"; + SetLastError(ec.value()); + return false; + } + + OS.write(ArgsCurCP.data(), ArgsCurCP.size()); + } else { + SmallVector ArgsUTF16; + + if (std::error_code ec = windows::UTF8ToUTF16(contents, ArgsUTF16)) { + if (ErrMsg) + *ErrMsg = "unable to convert to UTF-16"; + SetLastError(ec.value()); + return false; + } + + // Endianness guessing + char BOM[2]; + uint16_t src = UNI_UTF16_BYTE_ORDER_MARK_NATIVE; + memcpy(BOM, &src, 2); + OS.write(BOM, 2); + OS.write((char *)ArgsUTF16.data(), ArgsUTF16.size() << 1); + } + + if (OS.has_error()) { + if (ErrMsg) + *ErrMsg = "could not write to file"; + return false; + } + + return true; +} + bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef Args) { // The documented max length of the command line passed to CreateProcess. static const size_t MaxCommandStringLength = 32768; Index: lib/Support/Windows/WindowsSupport.h =================================================================== --- lib/Support/Windows/WindowsSupport.h +++ lib/Support/Windows/WindowsSupport.h @@ -166,6 +166,9 @@ std::error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl &utf16); std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, SmallVectorImpl &utf8); +/// Convert from UTF16 to the current code page used in the system +std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len, + SmallVectorImpl &utf8); } // end namespace windows } // end namespace sys } // end namespace llvm. Index: unittests/Support/ProgramTest.cpp =================================================================== --- unittests/Support/ProgramTest.cpp +++ unittests/Support/ProgramTest.cpp @@ -34,6 +34,16 @@ #error sleep_for is not implemented on your platform. #endif +#define ASSERT_NO_ERROR(x) \ + if (std::error_code ASSERT_NO_ERROR_ec = x) { \ + SmallString<128> MessageStorage; \ + raw_svector_ostream Message(MessageStorage); \ + Message << #x ": did not return errc::success.\n" \ + << "error number: " << ASSERT_NO_ERROR_ec.value() << "\n" \ + << "error message: " << ASSERT_NO_ERROR_ec.message() << "\n"; \ + GTEST_FATAL_FAILURE_(MessageStorage.c_str()); \ + } else { \ + } // From TestMain.cpp. extern const char *TestMainArgv0; @@ -220,4 +230,46 @@ } +#ifdef LLVM_ON_WIN32 +const char utf16le_text[] = + "\x6c\x00\x69\x00\x6e\x00\x67\x00\xfc\x00\x69\x00\xe7\x00\x61\x00"; +const char utf16be_text[] = + "\x00\x6c\x00\x69\x00\x6e\x00\x67\x00\xfc\x00\x69\x00\xe7\x00\x61"; +#endif +const char utf8_text[] = "\x6c\x69\x6e\x67\xc3\xbc\x69\xc3\xa7\x61"; + +TEST(ProgramTest, TestWriteWithSystemEncoding) { + SmallString<128> TestDirectory; + ASSERT_NO_ERROR(fs::createUniqueDirectory("program-test", TestDirectory)); + errs() << "Test Directory: " << TestDirectory << '\n'; + errs().flush(); + SmallString<128> file_pathname(TestDirectory); + path::append(file_pathname, "international-file.txt"); + std::string ErrMsg; + ASSERT_TRUE(sys::writeFileWithSystemEncoding(file_pathname.c_str(), utf8_text, + &ErrMsg, true)); + int fd = 0; + ASSERT_NO_ERROR(fs::openFileForRead(file_pathname.c_str(), fd)); +// Only on Windows we should expect the file to be encoded in UTF16. For other +// systems, expect it to be in UTF8 +#ifdef LLVM_ON_WIN32 + char buf[18]; + ASSERT_EQ(::read(fd, buf, 18), 18); + if (strncmp(buf, "\xfe\xff", 2) == 0) { // UTF16-BE + ASSERT_EQ(strncmp(&buf[2], utf16be_text, 16), 0); + } else if (strncmp(buf, "\xff\xfe", 2) == 0) { // UTF16-LE + ASSERT_EQ(strncmp(&buf[2], utf16le_text, 16), 0); + } else { + FAIL() << "Invalid BOM in UTF-16 file"; + } +#else + char buf[10]; + ASSERT_EQ(::read(fd, buf, 10), 10); + ASSERT_EQ(strncmp(buf, utf8_text, 10), 0); +#endif + ::close(fd); + ASSERT_NO_ERROR(fs::remove(Twine(file_pathname))); + ASSERT_NO_ERROR(fs::remove(TestDirectory.str())); +} + } // end anonymous namespace