Diff 251105

clang/test/Analysis/checker-plugins.c

	Show First 20 Lines • Show All 110 Lines • ▼ Show 20 Lines

	// RUN: %clang_analyze_cc1 %s \			// RUN: %clang_analyze_cc1 %s \
	// RUN: -load %llvmshlibdir/CheckerOptionHandlingAnalyzerPlugin%pluginext\			// RUN: -load %llvmshlibdir/CheckerOptionHandlingAnalyzerPlugin%pluginext\
	// RUN: -analyzer-checker=example.MyChecker \			// RUN: -analyzer-checker=example.MyChecker \
	// RUN: -analyzer-checker-option-help \			// RUN: -analyzer-checker-option-help \
	// RUN: 2>&1 \| FileCheck %s -check-prefix=CHECK-CHECKER-OPTION-HELP			// RUN: 2>&1 \| FileCheck %s -check-prefix=CHECK-CHECKER-OPTION-HELP

	// CHECK-CHECKER-OPTION-HELP: example.MyChecker:ExampleOption (bool) This is an			// CHECK-CHECKER-OPTION-HELP: example.MyChecker:ExampleOption (bool) This is an
	// CHECK-CHECKER-OPTION-HELP-SAME: example checker opt. (default: false)			// CHECK-CHECKER-OPTION-HELP-SAME: example checker opt. (default:
				// CHECK-CHECKER-OPTION-HELP-NEXT: false)
				hoyFBUnsubmitted Done Reply Inline Actions LGTM, thanks for fixing this! hoyFB: LGTM, thanks for fixing this!

llvm/include/llvm/Support/FormattedStream.h

//===-- llvm/Support/FormattedStream.h - Formatted streams ------- C++ --===//		//===-- llvm/Support/FormattedStream.h - Formatted streams ------- C++ --===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// This file contains raw_ostream implementations for streams to do		// This file contains raw_ostream implementations for streams to do
// things like pretty-print comments.		// things like pretty-print comments.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_FORMATTEDSTREAM_H		#ifndef LLVM_SUPPORT_FORMATTEDSTREAM_H
#define LLVM_SUPPORT_FORMATTEDSTREAM_H		#define LLVM_SUPPORT_FORMATTEDSTREAM_H

		#include "llvm/ADT/SmallString.h"
#include "llvm/Support/raw_ostream.h"		#include "llvm/Support/raw_ostream.h"
#include <utility>		#include <utility>

namespace llvm {		namespace llvm {

/// formatted_raw_ostream - A raw_ostream that wraps another one and keeps track		/// formatted_raw_ostream - A raw_ostream that wraps another one and keeps track
/// of line and column position, allowing padding out to specific column		/// of line and column position, allowing padding out to specific column
/// boundaries and querying the number of lines written to the stream.		/// boundaries and querying the number of lines written to the stream.
///		///
		kristof.beylsUnsubmitted Done Reply Inline Actions I think it would be useful to add a description to this comment that: (a) This class assumes that a UTF-8 encoding is used on the Stream; and (b) "This doesn't attempt to handle everything unicode can do (combining characters, right-to-left markers, ...), but hopefully covers most things likely to be common in messages and source code we might want to print." kristof.beyls: I think it would be useful to add a description to this comment that: (a) This class assumes…
class formatted_raw_ostream : public raw_ostream {		class formatted_raw_ostream : public raw_ostream {
/// TheStream - The real stream we output to. We set it to be		/// TheStream - The real stream we output to. We set it to be
/// unbuffered, since we're already doing our own buffering.		/// unbuffered, since we're already doing our own buffering.
///		///
raw_ostream *TheStream;		raw_ostream *TheStream;

/// Position - The current output column and line of the data that's		/// Position - The current output column and line of the data that's
/// been flushed and the portion of the buffer that's been		/// been flushed and the portion of the buffer that's been
/// scanned. The line and column scheme is zero-based.		/// scanned. The line and column scheme is zero-based.
///		///
std::pair<unsigned, unsigned> Position;		std::pair<unsigned, unsigned> Position;

/// Scanned - This points to one past the last character in the		/// Scanned - This points to one past the last character in the
/// buffer we've scanned.		/// buffer we've scanned.
///		///
const char *Scanned;		const char *Scanned;

		/// PartialUTF8Char - Either empty or a prefix of a UTF-8 code unit sequence
		hubert.reinterpretcastUnsubmitted Done Reply Inline Actions s/UTF-8 character/UTF-8 code unit sequence for a Unicode scalar value/; hubert.reinterpretcast: s/UTF-8 character/UTF-8 code unit sequence for a Unicode scalar value/;
		/// for a Unicode scalar value which should be prepended to the buffer for the
		/// next call to ComputePosition. This is needed when the buffer is flushed
		/// when it ends part-way through the UTF-8 encoding of a Unicode scalar
		hubert.reinterpretcastUnsubmitted Done Reply Inline Actions s/a UTF-8 character/the UTF-8 encoding of a Unicode scalar value/; hubert.reinterpretcast: s/a UTF-8 character/the UTF-8 encoding of a Unicode scalar value/;
		/// value, so that we can compute the display width of the character once we
		/// have the rest of it.
		benlangmuirUnsubmitted Done Reply Inline Actions The changes related to `PartialUTF8Char` LGTM, thanks! benlangmuir: The changes related to `PartialUTF8Char` LGTM, thanks!
		SmallString<4> PartialUTF8Char;

void write_impl(const char *Ptr, size_t Size) override;		void write_impl(const char *Ptr, size_t Size) override;

/// current_pos - Return the current position within the stream,		/// current_pos - Return the current position within the stream,
/// not counting the bytes currently in the buffer.		/// not counting the bytes currently in the buffer.
uint64_t current_pos() const override {		uint64_t current_pos() const override {
// Our current position in the stream is all the contents which have been		// Our current position in the stream is all the contents which have been
// written to the underlying stream (not the current position of the		// written to the underlying stream (not the current position of the
// underlying stream).		// underlying stream).
return TheStream->tell();		return TheStream->tell();
}		}

/// ComputePosition - Examine the given output buffer and figure out the new		/// ComputePosition - Examine the given output buffer and figure out the new
/// position after output.		/// position after output. This is safe to call multiple times on the same
///		/// buffer, as it records the most recently scanned character and resumes from
		/// there when the buffer has not been flushed.
void ComputePosition(const char *Ptr, size_t size);		void ComputePosition(const char *Ptr, size_t size);

		/// UpdatePosition - scan the characters in [Ptr, Ptr+Size), and update the
		/// line and column numbers. Unlike ComputePosition, this must be called
		/// exactly once on each region of the buffer.
		void UpdatePosition(const char *Ptr, size_t Size);

void setStream(raw_ostream &Stream) {		void setStream(raw_ostream &Stream) {
releaseStream();		releaseStream();

TheStream = &Stream;		TheStream = &Stream;

// This formatted_raw_ostream inherits from raw_ostream, so it'll do its		// This formatted_raw_ostream inherits from raw_ostream, so it'll do its
// own buffering, and it doesn't need or want TheStream to do another		// own buffering, and it doesn't need or want TheStream to do another
// layer of buffering underneath. Resize the buffer to what TheStream		// layer of buffering underneath. Resize the buffer to what TheStream
Show All 33 Lines	public:

/// PadToColumn - Align the output to some column number. If the current		/// PadToColumn - Align the output to some column number. If the current
/// column is already equal to or more than NewCol, PadToColumn inserts one		/// column is already equal to or more than NewCol, PadToColumn inserts one
/// space.		/// space.
///		///
/// \param NewCol - The column to move to.		/// \param NewCol - The column to move to.
formatted_raw_ostream &PadToColumn(unsigned NewCol);		formatted_raw_ostream &PadToColumn(unsigned NewCol);

/// getColumn - Return the column number		unsigned getColumn() {
unsigned getColumn() { return Position.first; }		// Calculate current position, taking buffer contents into account.
		ComputePosition(getBufferStart(), GetNumBytesInBuffer());
		return Position.first;
		}

/// getLine - Return the line number		unsigned getLine() {
unsigned getLine() { return Position.second; }		// Calculate current position, taking buffer contents into account.
		ComputePosition(getBufferStart(), GetNumBytesInBuffer());
		return Position.second;
		}

raw_ostream &resetColor() override {		raw_ostream &resetColor() override {
TheStream->resetColor();		TheStream->resetColor();
return *this;		return *this;
}		}

raw_ostream &reverseColor() override {		raw_ostream &reverseColor() override {
TheStream->reverseColor();		TheStream->reverseColor();
▲ Show 20 Lines • Show All 41 Lines • Show Last 20 Lines

llvm/lib/Support/FormattedStream.cpp

	//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----- C++ --===//			//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----- C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// This file contains the implementation of formatted_raw_ostream.			// This file contains the implementation of formatted_raw_ostream.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "llvm/Support/FormattedStream.h"			#include "llvm/Support/FormattedStream.h"
				#include "llvm/Support/ConvertUTF.h"
	#include "llvm/Support/Debug.h"			#include "llvm/Support/Debug.h"
				#include "llvm/Support/Locale.h"
	#include "llvm/Support/raw_ostream.h"			#include "llvm/Support/raw_ostream.h"
	#include <algorithm>			#include <algorithm>

	using namespace llvm;			using namespace llvm;

	/// UpdatePosition - Examine the given char sequence and figure out which			/// UpdatePosition - Examine the given char sequence and figure out which
	/// column we end up in after output, and how many line breaks are contained.			/// column we end up in after output, and how many line breaks are contained.
	///			/// This assumes that the input string is well-formed UTF-8, and takes into
	static void UpdatePosition(std::pair<unsigned, unsigned> &Position, const char *Ptr, size_t Size) {			/// account Unicode characters which render as multiple columns wide.
				hubert.reinterpretcastUnsubmitted Done Reply Inline Actions s/unicode/Unicode/; hubert.reinterpretcast: s/unicode/Unicode/;
				void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) {
	unsigned &Column = Position.first;			unsigned &Column = Position.first;
	unsigned &Line = Position.second;			unsigned &Line = Position.second;

	// Keep track of the current column and line by scanning the string for			auto ProcessCodePoint = [&Line, &Column](StringRef CP) {
				kristof.beylsUnsubmitted Done Reply Inline Actions <bikeshedding mode> Given that ProcessCodePoint assumes that the Unicode code point represented in the UTF-8 encoding, maybe it be slightly better to name the lambda as ProcessUTF8CodePoint rather than ProcessCodePoint? </bikeshedding mode> kristof.beyls: <bikeshedding mode> Given that ProcessCodePoint assumes that the Unicode code point represented…
	// special characters			int Width = sys::locale::columnWidth(CP);
				kristof.beylsUnsubmitted Done Reply Inline Actions I'm wondering if using sys::unicode::columnWidthUTF8 instead of sys::locale::columnWidth would be more future-proof and more clearly describe the intent that this function only processes UTF-8 and not strings encoded in other encodings? kristof.beyls: I'm wondering if using sys::unicode::columnWidthUTF8 instead of sys::locale::columnWidth would…
	for (const char *End = Ptr + Size; Ptr != End; ++Ptr) {			// columnWidth returns -1 for non-printing characters.
	++Column;			if (Width != -1)
				kristof.beylsUnsubmitted Done Reply Inline Actions The documentation for sys::unicode::columnWidthUTF8 documents it returns ErrorNonPrintableCharacter (-1) if the text contains non-printable characters. Maybe it's more self-documenting to compare against ErrorNonPrintableCharacter rather than -1 in the above if condition? kristof.beyls: The documentation for sys::unicode::columnWidthUTF8 documents it returns…
	switch (*Ptr) {			Column += Width;

				// If this is the final byte of a multi-byte sequence, it can't be any of
				// the special whitespace characters below.
				kristof.beylsUnsubmitted Done Reply Inline Actions Reading through the code linearly from the top to the bottom, I'm a bit surprised by this comment. I would expect CP to contain exactly the bytes that when interpreted as a UTF-8 encoded Unicode character, form exactly one Unicode character. Therefore, I'm not sure how to interpret "If this is the final byte of a multi-byte sequence.". I'm expecting "this" to refer to "CP" in this context. But that cannot be "just" the final byte of a multi-byte sequence, unless my assumption that CP contains exactly the bytes forming a single UTF-8 encoded Unicode character is wrong. Could CP contain a partial UTF-8 encoded character? If so, maybe it'd be better to change the name ProcessCodePoint to something that suggests that could be possible? kristof.beyls: Reading through the code linearly from the top to the bottom, I'm a bit surprised by this…
				if (CP.size() > 1)
				return;

				switch (CP[0]) {
	case '\n':			case '\n':
	Line += 1;			Line += 1;
	LLVM_FALLTHROUGH;			LLVM_FALLTHROUGH;
	case '\r':			case '\r':
	Column = 0;			Column = 0;
	break;			break;
	case '\t':			case '\t':
	// Assumes tab stop = 8 characters.			// Assumes tab stop = 8 characters.
	Column += (8 - (Column & 0x7)) & 0x7;			Column += (8 - (Column & 0x7)) & 0x7;
	break;			break;
	}			}
				};

				// If we have a partial UTF-8 sequence from the previous buffer, check that
				// first.
				if (PartialUTF8Char.size()) {
				size_t BytesFromBuffer =
				getNumBytesForUTF8(PartialUTF8Char[0]) - PartialUTF8Char.size();
				hubert.reinterpretcastUnsubmitted Done Reply Inline Actions The overflow-free version should be preferred: `Size < NumBytes - PartialUTF8Char.size()` Considering that `NumBytes - PartialUTF8Char.size()` is already used in the `else`, this might as well be named `NumBytesNeededFromBuffer` first (at which point it would be the only use of `NumBytes`, so we can get rid of `NumBytes`): size_t NumBytesNeededFromBuffer = getNumBytesForUTF8(PartialUTF8Char[0]) - PartialUTF8Char.size(); if (Size < NumBytesNeededFromBuffer) { hubert.reinterpretcast: The overflow-free version should be preferred: `Size < NumBytes - PartialUTF8Char.size()`…
				if (Size < BytesFromBuffer) {
				// If we still don't have enough bytes for a complete code point, just
				// append what we have.
				PartialUTF8Char.append(StringRef(Ptr, Size));
				return;
				hubert.reinterpretcastUnsubmitted Done Reply Inline Actions s/code-point/code point/; hubert.reinterpretcast: s/code-point/code point/;
				} else {
				hubert.reinterpretcastUnsubmitted Done Reply Inline Actions Typo: s/ane/and/; hubert.reinterpretcast: Typo: s/ane/and/;
				// The first few bytes from the buffer will complete the code point.
				// Concatenate them and process their effect on the line and column
				// numbers.
				PartialUTF8Char.append(StringRef(Ptr, BytesFromBuffer));
				ProcessCodePoint(PartialUTF8Char);
				PartialUTF8Char.clear();
				Ptr += BytesFromBuffer;
				Size -= BytesFromBuffer;
				}
				}

				// Now scan the rest of the buffer.
				unsigned NumBytes;
				for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) {
				NumBytes = getNumBytesForUTF8(*Ptr);

				hubert.reinterpretcastUnsubmitted Done Reply Inline Actions s/UTF-8 code point/UTF-8 code unit sequence for a Unicode scalar value/; hubert.reinterpretcast: s/UTF-8 code point/UTF-8 code unit sequence for a Unicode scalar value/;
				// The buffer might end part way through a UTF-8 code unit sequence for a
				hubert.reinterpretcastUnsubmitted Done Reply Inline Actions Comma after "happens". hubert.reinterpretcast: Comma after "happens".
				// Unicode scalar value if it got flushed. If this happens, we can't know
				// the display width until we see the rest of the code point. Stash the
				hubert.reinterpretcastUnsubmitted Done Reply Inline Actions s/bsing/being/; hubert.reinterpretcast: s/bsing/being/;
				// bytes we do have, so that we can reconstruct the whole code point later,
				// even if the buffer is being flushed.
				if ((End - Ptr) < NumBytes) {
				hubert.reinterpretcastUnsubmitted Done Reply Inline Actions Prefer the overflow-free version: `End - Ptr < NumBytes`. If inclined to name `End - Ptr` (it would occur twice), `BytesAvailable` makes sense. hubert.reinterpretcast: Prefer the overflow-free version: `End - Ptr < NumBytes`. If inclined to name `End - Ptr` (it…
				PartialUTF8Char = StringRef(Ptr, End - Ptr);
				return;
				}

				ProcessCodePoint(StringRef(Ptr, NumBytes));
	}			}
	}			}

	/// ComputePosition - Examine the current output and update line and column			/// ComputePosition - Examine the current output and update line and column
	/// counts.			/// counts.
	void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {			void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
	// If our previous scan pointer is inside the buffer, assume we already			// If our previous scan pointer is inside the buffer, assume we already
	// scanned those bytes. This depends on raw_ostream to not change our buffer			// scanned those bytes. This depends on raw_ostream to not change our buffer
	// in unexpected ways.			// in unexpected ways.
	if (Ptr <= Scanned && Scanned <= Ptr + Size)			if (Ptr <= Scanned && Scanned <= Ptr + Size)
	// Scan all characters added since our last scan to determine the new			// Scan all characters added since our last scan to determine the new
	// column.			// column.
	UpdatePosition(Position, Scanned, Size - (Scanned - Ptr));			UpdatePosition(Scanned, Size - (Scanned - Ptr));
	else			else
	UpdatePosition(Position, Ptr, Size);			UpdatePosition(Ptr, Size);

	// Update the scanning pointer.			// Update the scanning pointer.
	Scanned = Ptr + Size;			Scanned = Ptr + Size;
	}			}

	/// PadToColumn - Align the output to some column number.			/// PadToColumn - Align the output to some column number.
	///			///
	/// \param NewCol - The column to move to.			/// \param NewCol - The column to move to.
	▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines

llvm/test/MC/ARM/lsl-zero.s

	// RUN: llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 \| FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV7 %s			// RUN: llvm-mc -triple=thumbv7 -show-encoding < %s 2>/dev/null \| FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV7 %s
	// RUN: llvm-mc -triple=thumbv8 -show-encoding < %s 2>&1 \| FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV8 %s			// RUN: llvm-mc -triple=thumbv8 -show-encoding < %s 2>/dev/null \| FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONARM --check-prefix=CHECK-THUMBV8 %s
	// RUN: llvm-mc -triple=armv7 -show-encoding < %s 2>&1 \| FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM %s			// RUN: llvm-mc -triple=armv7 -show-encoding < %s 2>/dev/null \| FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM %s

	// lsl #0 is actually mov, so here we check that it behaves the same as			// lsl #0 is actually mov, so here we check that it behaves the same as
	// mov with regards to the permitted registers and how it behaves in an			// mov with regards to the permitted registers and how it behaves in an
	// IT block.			// IT block.

	// Non-flags-setting with only one of source and destination SP should			// Non-flags-setting with only one of source and destination SP should
	// be OK			// be OK
	lsl sp, r0, #0			lsl sp, r0, #0
	▲ Show 20 Lines • Show All 46 Lines • Show Last 20 Lines

llvm/unittests/Support/formatted_raw_ostream_test.cpp

Show All 23 Lines	TEST(formatted_raw_ostreamTest, Test_Tell) {

for (unsigned i = 0; i != 3; ++i) {		for (unsigned i = 0; i != 3; ++i) {
C.write(tmp, 100);		C.write(tmp, 100);

EXPECT_EQ(100*(i+1), (unsigned) C.tell());		EXPECT_EQ(100*(i+1), (unsigned) C.tell());
}		}
}		}

		TEST(formatted_raw_ostreamTest, Test_LineColumn) {
		// Test tracking of line and column numbers in a stream.
		SmallString<128> A;
		raw_svector_ostream B(A);
		formatted_raw_ostream C(B);

		EXPECT_EQ(0U, C.getLine());
		EXPECT_EQ(0U, C.getColumn());

		C << "a";
		EXPECT_EQ(0U, C.getLine());
		EXPECT_EQ(1U, C.getColumn());

		C << "bcdef";
		EXPECT_EQ(0U, C.getLine());
		EXPECT_EQ(6U, C.getColumn());

		// '\n' increments line number, sets column to zero.
		C << "\n";
		EXPECT_EQ(1U, C.getLine());
		EXPECT_EQ(0U, C.getColumn());

		// '\r sets column to zero without changing line number
		hubert.reinterpretcastUnsubmitted Done Reply Inline Actions s/coulmn/column/; hubert.reinterpretcast: s/coulmn/column/;
		C << "foo\r";
		EXPECT_EQ(1U, C.getLine());
		EXPECT_EQ(0U, C.getColumn());

		// '\t' advances column to the next multiple of 8.
		// FIXME: If the column number is already a multiple of 8 this will do
		// nothing, is this behaviour correct?
		C << "1\t";
		EXPECT_EQ(8U, C.getColumn());
		C << "\t";
		EXPECT_EQ(8U, C.getColumn());
		C << "1234567\t";
		EXPECT_EQ(16U, C.getColumn());
		EXPECT_EQ(1U, C.getLine());
		}

		TEST(formatted_raw_ostreamTest, Test_Flush) {
		// Flushing the buffer causes the characters in the buffer to be scanned
		hubert.reinterpretcastUnsubmitted Done Reply Inline Actions s/charcters/characters/; hubert.reinterpretcast: s/charcters/characters/;
		// before the buffer is emptied, so line and column numbers will still be
		// tracked properly.
		SmallString<128> A;
		raw_svector_ostream B(A);
		B.SetBufferSize(32);
		formatted_raw_ostream C(B);

		C << "\nabc";
		EXPECT_EQ(4U, C.GetNumBytesInBuffer());
		C.flush();
		EXPECT_EQ(1U, C.getLine());
		EXPECT_EQ(3U, C.getColumn());
		EXPECT_EQ(0U, C.GetNumBytesInBuffer());
		}

		TEST(formatted_raw_ostreamTest, Test_UTF8) {
		hubert.reinterpretcastUnsubmitted Not Done Reply Inline Actions Should there be a test for combining characters? hubert.reinterpretcast: Should there be a test for combining characters?
		ostannardAuthorUnsubmitted Done Reply Inline Actions This doesn't support combining characters, I don't think there's much point in adding a test for something which doesn't work. ostannard: This doesn't support combining characters, I don't think there's much point in adding a test…
		hubert.reinterpretcastUnsubmitted Not Done Reply Inline Actions Got it; thanks. hubert.reinterpretcast: Got it; thanks.
		SmallString<128> A;
		raw_svector_ostream B(A);
		B.SetBufferSize(32);
		formatted_raw_ostream C(B);

		// U+00A0 Non-breaking space: encoded as two bytes, but only one column wide.
		C << u8"\u00a0";
		EXPECT_EQ(0U, C.getLine());
		EXPECT_EQ(1U, C.getColumn());
		EXPECT_EQ(2U, C.GetNumBytesInBuffer());

		// U+2468 CIRCLED DIGIT NINE: encoded as three bytes, but only one column
		// wide.
		C << u8"\u2468";
		EXPECT_EQ(0U, C.getLine());
		EXPECT_EQ(2U, C.getColumn());
		EXPECT_EQ(5U, C.GetNumBytesInBuffer());

		// U+00010000 LINEAR B SYLLABLE B008 A: encoded as four bytes, but only one
		// column wide.
		C << u8"\U00010000";
		EXPECT_EQ(0U, C.getLine());
		EXPECT_EQ(3U, C.getColumn());
		EXPECT_EQ(9U, C.GetNumBytesInBuffer());

		// U+55B5, CJK character, encodes as three bytes, takes up two columns.
		hubert.reinterpretcastUnsubmitted Done Reply Inline Actions s/chinese/Chinese/; or CJK. hubert.reinterpretcast: s/chinese/Chinese/; or CJK.
		C << u8"\u55b5";
		EXPECT_EQ(0U, C.getLine());
		EXPECT_EQ(5U, C.getColumn());
		EXPECT_EQ(12U, C.GetNumBytesInBuffer());

		// U+200B, zero-width space, encoded as three bytes but has no effect on the
		// column or line number.
		C << u8"\u200b";
		EXPECT_EQ(0U, C.getLine());
		EXPECT_EQ(5U, C.getColumn());
		EXPECT_EQ(15U, C.GetNumBytesInBuffer());
		}

		TEST(formatted_raw_ostreamTest, Test_UTF8Buffered) {
		SmallString<128> A;
		raw_svector_ostream B(A);
		B.SetBufferSize(4);
		formatted_raw_ostream C(B);

		// This character encodes as three bytes, so will cause the buffer to be
		kristof.beylsUnsubmitted Not Done Reply Inline Actions I guess "This" refers to \u2468? If so, it'd be easier to read this comment if it was written like: "// \u2468 encodes as three bytes, ..." kristof.beyls: I guess "This" refers to \u2468? If so, it'd be easier to read this comment if it was written…
		// flushed after the first byte (4 byte buffer, 3 bytes already written). We
		// need to save the first part of the UTF-8 encoding until after the buffer is
		// cleared and the remaining two bytes are written, at which point we can
		// check the display width. In this case the display width is 1, so we end at
		// column 4, with 6 bytes written into total, 2 of which are in the buffer.
		C << u8"123\u2468";
		EXPECT_EQ(0U, C.getLine());
		EXPECT_EQ(4U, C.getColumn());
		EXPECT_EQ(2U, C.GetNumBytesInBuffer());
		C.flush();
		EXPECT_EQ(6U, A.size());

		// Same as above, but with a CJK character which displays as two columns.
		hubert.reinterpretcastUnsubmitted Done Reply Inline Actions Same comment re: CJK. hubert.reinterpretcast: Same comment re: CJK.
		C << u8"123\u55b5";
		EXPECT_EQ(0U, C.getLine());
		EXPECT_EQ(9U, C.getColumn());
		EXPECT_EQ(2U, C.GetNumBytesInBuffer());
		C.flush();
		EXPECT_EQ(12U, A.size());
		}

		TEST(formatted_raw_ostreamTest, Test_UTF8TinyBuffer) {
		SmallString<128> A;
		raw_svector_ostream B(A);
		B.SetBufferSize(1);
		formatted_raw_ostream C(B);

		// The stream has a one-byte buffer, so it gets flushed multiple times while
		// printing a single Unicode character.
		hubert.reinterpretcastUnsubmitted Done Reply Inline Actions Same comment re: "Unicode". hubert.reinterpretcast: Same comment re: "Unicode".
		C << u8"\u2468";
		EXPECT_EQ(0U, C.getLine());
		EXPECT_EQ(1U, C.getColumn());
		EXPECT_EQ(0U, C.GetNumBytesInBuffer());
		C.flush();
		EXPECT_EQ(3U, A.size());
		}
}		}

This is an archive of the discontinued LLVM Phabricator instance.

[Support] Fix formatted_raw_ostream for UTF-8
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 251105

clang/test/Analysis/checker-plugins.c

llvm/include/llvm/Support/FormattedStream.h

llvm/lib/Support/FormattedStream.cpp

llvm/test/MC/ARM/lsl-zero.s

llvm/unittests/Support/formatted_raw_ostream_test.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[Support] Fix formatted_raw_ostream for UTF-8ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 251105

clang/test/Analysis/checker-plugins.c

llvm/include/llvm/Support/FormattedStream.h

llvm/lib/Support/FormattedStream.cpp

llvm/test/MC/ARM/lsl-zero.s

llvm/unittests/Support/formatted_raw_ostream_test.cpp

[Support] Fix formatted_raw_ostream for UTF-8
ClosedPublic