This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
bindings/python/
-
python/
-
clang/
-
cindex.py
-
tests/cindex/
-
cindex/
-
test_translation_unit.py

Differential D45741

Python bindings: Fix handling of file bodies with multi-byte characters
AbandonedPublic

Authored by mheinzler on Apr 17 2018, 3:10 PM.

Download Raw Diff

Details

Reviewers

kristina
serge-sans-paille

Group Reviewers

Restricted Project

Summary

With python3 there is a difference between the length of the string and the length of the utf-8 encoded bytes array. To not cut off characters at the end when the string contains multi-byte characters, the length of file contents that gets passed to clang needs to be calculated from their bytes representation.

I also added a test case that catches this. I needed to add the coding line at the top of the test unit to make python2 work with the embedded Unicode character. Alternatively we could replace the character with /uXXXX, but then there would be other problems with python2.

Diff Detail

Repository: rC Clang

Event Timeline

mheinzler created this revision.Apr 17 2018, 3:10 PM

Herald added a subscriber: cfe-commits. · View Herald TranscriptApr 17 2018, 3:10 PM

Would you mind re-uploading these patches with full context (with diff -U99999). 3 lines of context around changes makes this very difficult to review. Also I would suggest testing for Python version and using appropriate semantics (using encode in case of Python3). Not entirely sure if the bindings are supposed to be compatible with Python3, but I don't see any harm. However explicit tests that avoid changing functionality of existing Python2.7 code would be better in my opinion.

kristina added a reviewer: kristina.Sep 16 2018, 12:12 PM

Sorry, here's the diff for the whole files.

The b function defined at the top of the file already does what you suggest. For python2 it returns the string unchanged, for python3 it calls encode. So there shouldn't be any change at all for python2.

I'm closing this because it has been fixed in master by:
https://reviews.llvm.org/D56429

Herald added a reviewer: serge-sans-paille. · View Herald TranscriptFeb 3 2019, 3:08 AM

Herald added a project: Restricted Project. · View Herald Transcript

Herald added a subscriber: arphaman. · View Herald Transcript

Revision Contents

Path

Size

bindings/

python/

clang/

12 lines

tests/

cindex/

test_translation_unit.py

12 lines

Diff 165728

bindings/python/clang/cindex.py

	Show First 20 Lines • Show All 2,797 Lines • ▼ Show 20 Lines		def from_source(cls, filename, args=None, unsaved_files=None, options=0,
	if len(unsaved_files) > 0:			if len(unsaved_files) > 0:
	unsaved_array = (_CXUnsavedFile * len(unsaved_files))()			unsaved_array = (_CXUnsavedFile * len(unsaved_files))()
	for i, (name, contents) in enumerate(unsaved_files):			for i, (name, contents) in enumerate(unsaved_files):
	if hasattr(contents, "read"):			if hasattr(contents, "read"):
	contents = contents.read()			contents = contents.read()

	unsaved_array[i].name = b(name)			unsaved_array[i].name = b(name)
	unsaved_array[i].contents = b(contents)			unsaved_array[i].contents = b(contents)
	unsaved_array[i].length = len(contents)			unsaved_array[i].length = len(unsaved_array[i].contents)

	ptr = conf.lib.clang_parseTranslationUnit(index, filename, args_array,			ptr = conf.lib.clang_parseTranslationUnit(index, filename, args_array,
	len(args), unsaved_array,			len(args), unsaved_array,
	len(unsaved_files), options)			len(unsaved_files), options)

	if not ptr:			if not ptr:
	raise TranslationUnitLoadError("Error parsing translation unit.")			raise TranslationUnitLoadError("Error parsing translation unit.")

	▲ Show 20 Lines • Show All 163 Lines • ▼ Show 20 Lines		def reparse(self, unsaved_files=None, options=0):
	for i,(name,value) in enumerate(unsaved_files):			for i,(name,value) in enumerate(unsaved_files):
	if not isinstance(value, str):			if not isinstance(value, str):
	# FIXME: It would be great to support an efficient version			# FIXME: It would be great to support an efficient version
	# of this, one day.			# of this, one day.
	value = value.read()			value = value.read()
	print(value)			print(value)
	if not isinstance(value, str):			if not isinstance(value, str):
	raise TypeError('Unexpected unsaved file contents.')			raise TypeError('Unexpected unsaved file contents.')
	unsaved_files_array[i].name = name			unsaved_files_array[i].name = b(name)
	unsaved_files_array[i].contents = value			unsaved_files_array[i].contents = b(value)
	unsaved_files_array[i].length = len(value)			unsaved_files_array[i].length = \
				len(unsaved_files_array[i].contents)
	ptr = conf.lib.clang_reparseTranslationUnit(self, len(unsaved_files),			ptr = conf.lib.clang_reparseTranslationUnit(self, len(unsaved_files),
	unsaved_files_array, options)			unsaved_files_array, options)

	def save(self, filename):			def save(self, filename):
	"""Saves the TranslationUnit to a file.			"""Saves the TranslationUnit to a file.

	This is equivalent to passing -emit-ast to the clang frontend. The			This is equivalent to passing -emit-ast to the clang frontend. The
	saved file can be loaded back into a TranslationUnit. Or, if it			saved file can be loaded back into a TranslationUnit. Or, if it
	▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines		def codeComplete(self, path, line, column, unsaved_files=None,
	# FIXME: It would be great to support an efficient version			# FIXME: It would be great to support an efficient version
	# of this, one day.			# of this, one day.
	value = value.read()			value = value.read()
	print(value)			print(value)
	if not isinstance(value, str):			if not isinstance(value, str):
	raise TypeError('Unexpected unsaved file contents.')			raise TypeError('Unexpected unsaved file contents.')
	unsaved_files_array[i].name = b(name)			unsaved_files_array[i].name = b(name)
	unsaved_files_array[i].contents = b(value)			unsaved_files_array[i].contents = b(value)
	unsaved_files_array[i].length = len(value)			unsaved_files_array[i].length = \
				len(unsaved_files_array[i].contents)
	ptr = conf.lib.clang_codeCompleteAt(self, path, line, column,			ptr = conf.lib.clang_codeCompleteAt(self, path, line, column,
	unsaved_files_array, len(unsaved_files), options)			unsaved_files_array, len(unsaved_files), options)
	if ptr:			if ptr:
	return CodeCompletionResults(ptr)			return CodeCompletionResults(ptr)
	return None			return None

	def get_tokens(self, locations=None, extent=None):			def get_tokens(self, locations=None, extent=None):
	"""Obtain tokens in this translation unit.			"""Obtain tokens in this translation unit.
	▲ Show 20 Lines • Show All 1,143 Lines • Show Last 20 Lines

bindings/python/tests/cindex/test_translation_unit.py

				# -- coding: utf-8 --

	from contextlib import contextmanager			from contextlib import contextmanager
	import gc			import gc
	import os			import os
	import tempfile			import tempfile
	import unittest			import unittest

	from clang.cindex import CursorKind			from clang.cindex import CursorKind
	from clang.cindex import Cursor			from clang.cindex import Cursor
	▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines		def test_unsaved_files_2(self):
	from StringIO import StringIO			from StringIO import StringIO
	except:			except:
	from io import StringIO			from io import StringIO
	tu = TranslationUnit.from_source('fake.c', unsaved_files = [			tu = TranslationUnit.from_source('fake.c', unsaved_files = [
	('fake.c', StringIO('int x;'))])			('fake.c', StringIO('int x;'))])
	spellings = [c.spelling for c in tu.cursor.get_children()]			spellings = [c.spelling for c in tu.cursor.get_children()]
	self.assertEqual(spellings[-1], 'x')			self.assertEqual(spellings[-1], 'x')

				def test_unsaved_files_encoding(self):
				tu = TranslationUnit.from_source('fake.c', ['-I./'], unsaved_files = [
				('fake.c', """
				// 😀
				int x;
				""")
				])
				spellings = [c.spelling for c in tu.cursor.get_children()]
				self.assertEqual(spellings[-1], 'x')

	def assert_normpaths_equal(self, path1, path2):			def assert_normpaths_equal(self, path1, path2):
	""" Compares two paths for equality after normalizing them with			""" Compares two paths for equality after normalizing them with
	os.path.normpath			os.path.normpath
	"""			"""
	self.assertEqual(os.path.normpath(path1),			self.assertEqual(os.path.normpath(path1),
	os.path.normpath(path2))			os.path.normpath(path2))

	def test_includes(self):			def test_includes(self):
	▲ Show 20 Lines • Show All 170 Lines • Show Last 20 Lines