Index: lib/Frontend/TextDiagnostic.cpp =================================================================== --- lib/Frontend/TextDiagnostic.cpp +++ lib/Frontend/TextDiagnostic.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Locale.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Unicode.h" #include "llvm/Support/raw_ostream.h" #include @@ -818,6 +819,28 @@ if (DiagOpts->ShowColumn) // Compute the column number. if (unsigned ColNo = PLoc.getColumn()) { + // Correct the column number for multi-byte UTF-8 code-points. + bool Invalid = false; + StringRef BufData = Loc.getBufferData(&Invalid); + if (!Invalid) { + const char *BufStart = BufData.data(); + const char *BufEnd = BufStart + BufData.size(); + + // Decompose the location into a FID/Offset pair. + std::pair LocInfo = Loc.getDecomposedLoc(); + FileID FID = LocInfo.first; + const SourceManager &SM = Loc.getManager(); + const char *LineStart = + BufStart + + SM.getDecomposedLoc(SM.translateLineCol(FID, LineNo, 1)).second; + if (LineStart + ColNo < BufEnd) { + StringRef SourceLine(LineStart, ColNo); + int CorrectedColNo = llvm::sys::unicode::columnWidthUTF8(SourceLine); + if (CorrectedColNo != -1) + ColNo = unsigned(CorrectedColNo); + } + } + if (DiagOpts->getFormat() == DiagnosticOptions::MSVC) { OS << ','; // Visual Studio 2010 or earlier expects column number to be off by one Index: test/Misc/diag-utf8.cpp =================================================================== --- /dev/null +++ test/Misc/diag-utf8.cpp @@ -0,0 +1,10 @@ +// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s + +struct Foo { int member; }; + +void f(Foo foo) +{ + "ideeen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:7:14: error: invalid operands to binary expression ('const char *' and 'Foo') + "ideëen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:8:14: error: invalid operands to binary expression ('const char *' and 'Foo') + "idez̈en" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:9:14: error: invalid operands to binary expression ('const char *' and 'Foo') +}