Index: llvm/trunk/include/llvm/Support/raw_ostream.h =================================================================== --- llvm/trunk/include/llvm/Support/raw_ostream.h +++ llvm/trunk/include/llvm/Support/raw_ostream.h @@ -367,12 +367,16 @@ int FD; bool ShouldClose; + bool SupportsSeeking; + + /// True if this fd refers to a Windows console device. Mintty and other + /// terminal emulators are TTYs, but they are not consoles. + bool IsWindowsConsole = false; + std::error_code EC; uint64_t pos; - bool SupportsSeeking; - /// See raw_ostream::write_impl. void write_impl(const char *Ptr, size_t Size) override; Index: llvm/trunk/lib/Support/Locale.cpp =================================================================== --- llvm/trunk/lib/Support/Locale.cpp +++ llvm/trunk/lib/Support/Locale.cpp @@ -7,24 +7,11 @@ namespace locale { int columnWidth(StringRef Text) { -#ifdef _WIN32 - return Text.size(); -#else return llvm::sys::unicode::columnWidthUTF8(Text); -#endif } bool isPrint(int UCS) { -#ifdef _WIN32 - // Restrict characters that we'll try to print to the lower part of ASCII - // except for the control characters (0x20 - 0x7E). In general one can not - // reliably output code points U+0080 and higher using narrow character C/C++ - // output functions in Windows, because the meaning of the upper 128 codes is - // determined by the active code page in the console. - return ' ' <= UCS && UCS <= '~'; -#else return llvm::sys::unicode::isPrintable(UCS); -#endif } } // namespace locale Index: llvm/trunk/lib/Support/SourceMgr.cpp =================================================================== --- llvm/trunk/lib/Support/SourceMgr.cpp +++ llvm/trunk/lib/Support/SourceMgr.cpp @@ -345,12 +345,18 @@ static void printSourceLine(raw_ostream &S, StringRef LineContents) { // Print out the source line one character at a time, so we can expand tabs. for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) { - if (LineContents[i] != '\t') { - S << LineContents[i]; - ++OutCol; - continue; + size_t NextTab = LineContents.find('\t', i); + // If there were no tabs left, print the rest, we are done. + if (NextTab == StringRef::npos) { + S << LineContents.drop_front(i); + break; } + // Otherwise, print from i to NextTab. + S << LineContents.slice(i, NextTab); + OutCol += NextTab - i; + i = NextTab; + // If we have a tab, emit at least one space, then round up to 8 columns. do { S << ' '; Index: llvm/trunk/lib/Support/raw_ostream.cpp =================================================================== --- llvm/trunk/lib/Support/raw_ostream.cpp +++ llvm/trunk/lib/Support/raw_ostream.cpp @@ -60,6 +60,7 @@ #endif #ifdef _WIN32 +#include "llvm/Support/ConvertUTF.h" #include "Windows/WindowsSupport.h" #endif @@ -567,6 +568,12 @@ if (FD <= STDERR_FILENO) ShouldClose = false; +#ifdef _WIN32 + // Check if this is a console device. This is not equivalent to isatty. + IsWindowsConsole = + ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR; +#endif + // Get the starting position. off_t loc = ::lseek(FD, 0, SEEK_CUR); #ifdef _WIN32 @@ -609,10 +616,68 @@ /*GenCrashDiag=*/false); } +#if defined(_WIN32) +// The most reliable way to print unicode in a Windows console is with +// WriteConsoleW. To use that, first transcode from UTF-8 to UTF-16. This +// assumes that LLVM programs always print valid UTF-8 to the console. The data +// might not be UTF-8 for two major reasons: +// 1. The program is printing binary (-filetype=obj -o -), in which case it +// would have been gibberish anyway. +// 2. The program is printing text in a semi-ascii compatible codepage like +// shift-jis or cp1252. +// +// Most LLVM programs don't produce non-ascii text unless they are quoting +// user source input. A well-behaved LLVM program should either validate that +// the input is UTF-8 or transcode from the local codepage to UTF-8 before +// quoting it. If they don't, this may mess up the encoding, but this is still +// probably the best compromise we can make. +static bool write_console_impl(int FD, StringRef Data) { + SmallVector WideText; + + // Fall back to ::write if it wasn't valid UTF-8. + if (auto EC = sys::windows::UTF8ToUTF16(Data, WideText)) + return false; + + // On Windows 7 and earlier, WriteConsoleW has a low maximum amount of data + // that can be written to the console at a time. + size_t MaxWriteSize = WideText.size(); + if (!RunningWindows8OrGreater()) + MaxWriteSize = 32767; + + size_t WCharsWritten = 0; + do { + size_t WCharsToWrite = + std::min(MaxWriteSize, WideText.size() - WCharsWritten); + DWORD ActuallyWritten; + bool Success = + ::WriteConsoleW((HANDLE)::_get_osfhandle(FD), &WideText[WCharsWritten], + WCharsToWrite, &ActuallyWritten, + /*Reserved=*/nullptr); + + // The most likely reason for WriteConsoleW to fail is that FD no longer + // points to a console. Fall back to ::write. If this isn't the first loop + // iteration, something is truly wrong. + if (!Success) + return false; + + WCharsWritten += ActuallyWritten; + } while (WCharsWritten != WideText.size()); + return true; +} +#endif + void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { assert(FD >= 0 && "File already closed."); pos += Size; +#if defined(_WIN32) + // If this is a Windows console device, try re-encoding from UTF-8 to UTF-16 + // and using WriteConsoleW. If that fails, fall back to plain write(). + if (IsWindowsConsole) + if (write_console_impl(FD, StringRef(Ptr, Size))) + return; +#endif + // The maximum write size is limited to INT32_MAX. A write // greater than SSIZE_MAX is implementation-defined in POSIX, // and Windows _write requires 32 bit input. @@ -622,12 +687,6 @@ // It is observed that Linux returns EINVAL for a very large write (>2G). // Make it a reasonably small value. MaxWriteSize = 1024 * 1024 * 1024; -#elif defined(_WIN32) - // Writing a large size of output to Windows console returns ENOMEM. It seems - // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and - // the latter has a size limit (66000 bytes or less, depending on heap usage). - if (::_isatty(FD) && !RunningWindows8OrGreater()) - MaxWriteSize = 32767; #endif do { @@ -696,8 +755,17 @@ } size_t raw_fd_ostream::preferred_buffer_size() const { -#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix) - // Windows and Minix have no st_blksize. +#if defined(_WIN32) + // Disable buffering for console devices. Console output is re-encoded from + // UTF-8 to UTF-16 on Windows, and buffering it would require us to split the + // buffer on a valid UTF-8 codepoint boundary. Terminal buffering is disabled + // below on most other OSs, so do the same thing on Windows and avoid that + // complexity. + if (IsWindowsConsole) + return 0; + return raw_ostream::preferred_buffer_size(); +#elif !defined(__minix) + // Minix has no st_blksize. assert(FD >= 0 && "File not yet open!"); struct stat statbuf; if (fstat(FD, &statbuf) != 0)