This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
libcxx/include/
-
include/
-
vector

Differential D82111

Optimize 'construct at end' loops in vector
ClosedPublic

Authored by mvels on Jun 18 2020, 10:35 AM.

Download Raw Diff

Details

Reviewers

EricWF

Group Reviewers

Restricted Project

Commits

rGd96aac435423: Optimize 'construct at end' loops in vector
rG555106aa6935: Unreachable (removed)

Summary

This change adds local 'end' and 'pos' variables for the main loop inmstead of using the ConstructTransaction variables directly.

We observed that not all vector initialization and resize operations got properly vectorized, i.e., (partially) unrolled into XMM stores for floats.

For example, vector<int32_t> v(n, 1) gets vectorized, but vector<float> v(n, 1). It looks like the compiler assumes the state is leaked / aliased in the latter case (unclear how/why for float, but not for int32), and because of this fails to see vectorization optimization?

See https://gcc.godbolt.org/z/UWhiie

By using a local __new_end_ (fixed), and local __pos (copied into tx.pos_ per iteration), we offer the compiler a clean loop for unrolling.

A demonstration can be seen in the isolated logic in https://gcc.godbolt.org/z/KoCNWv

The com

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

mvels created this revision.Jun 18 2020, 10:35 AM

Herald added a project: Restricted Project. · View Herald TranscriptJun 18 2020, 10:35 AM

Herald added a reviewer: Restricted Project. · View Herald Transcript

Herald added a subscriber: libcxx-commits. · View Herald Transcript

EricWF accepted this revision.Jun 18 2020, 10:36 AM

This revision is now accepted and ready to land.Jun 18 2020, 10:36 AM

Harbormaster failed remote builds in B60852: Diff 271771!Jun 18 2020, 11:27 AM

Closed by commit rGd96aac435423: Optimize 'construct at end' loops in vector (authored by mvels). · Explain WhyJun 18 2020, 11:27 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

libcxx/

include/

vector

15 lines

Diff 271771

libcxx/include/vector

	Show First 20 Lines • Show All 1,037 Lines • ▼ Show 20 Lines
	// Precondition: __n > 0			// Precondition: __n > 0
	// Precondition: size() + __n <= capacity()			// Precondition: size() + __n <= capacity()
	// Postcondition: size() == size() + __n			// Postcondition: size() == size() + __n
	template <class _Tp, class _Allocator>			template <class _Tp, class _Allocator>
	void			void
	vector<_Tp, _Allocator>::__construct_at_end(size_type __n)			vector<_Tp, _Allocator>::__construct_at_end(size_type __n)
	{			{
	_ConstructTransaction __tx(*this, __n);			_ConstructTransaction __tx(*this, __n);
	for (; __tx.__pos_ != __tx.__new_end_; ++__tx.__pos_) {			const_pointer __new_end = __tx.__new_end_;
	__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__tx.__pos_));			for (pointer __pos = __tx.__pos_; __pos != __new_end; ++__pos, __tx.__pos_ = __pos) {
				__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__pos));
	}			}
	}			}

	// Copy constructs __n objects starting at __end_ from __x			// Copy constructs __n objects starting at __end_ from __x
	// throws if construction throws			// throws if construction throws
	// Precondition: __n > 0			// Precondition: __n > 0
	// Precondition: size() + __n <= capacity()			// Precondition: size() + __n <= capacity()
	// Postcondition: size() == old size() + __n			// Postcondition: size() == old size() + __n
	// Postcondition: [i] == __x for all i in [size() - __n, __n)			// Postcondition: [i] == __x for all i in [size() - __n, __n)
	template <class _Tp, class _Allocator>			template <class _Tp, class _Allocator>
	inline			inline
	void			void
	vector<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)			vector<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
	{			{
	_ConstructTransaction __tx(*this, __n);			_ConstructTransaction __tx(*this, __n);
	for (; __tx.__pos_ != __tx.__new_end_; ++__tx.__pos_) {			const_pointer __new_end = __tx.__new_end_;
	__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__tx.__pos_), __x);			for (pointer __pos = __tx.__pos_; __pos != __new_end; ++__pos, __tx.__pos_ = __pos) {
				__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__pos), __x);
	}			}
	}			}

	template <class _Tp, class _Allocator>			template <class _Tp, class _Allocator>
	template <class _ForwardIterator>			template <class _ForwardIterator>
	typename enable_if			typename enable_if
	<			<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,			__is_cpp17_forward_iterator<_ForwardIterator>::value,
	▲ Show 20 Lines • Show All 675 Lines • ▼ Show 20 Lines
	void			void
	vector<_Tp, _Allocator>::__move_range(pointer __from_s, pointer __from_e, pointer __to)			vector<_Tp, _Allocator>::__move_range(pointer __from_s, pointer __from_e, pointer __to)
	{			{
	pointer __old_last = this->__end_;			pointer __old_last = this->__end_;
	difference_type __n = __old_last - __to;			difference_type __n = __old_last - __to;
	{			{
	pointer __i = __from_s + __n;			pointer __i = __from_s + __n;
	_ConstructTransaction __tx(*this, __from_e - __i);			_ConstructTransaction __tx(*this, __from_e - __i);
	for (; __i < __from_e; ++__i, ++__tx.__pos_) {			for (pointer __pos = __tx.__pos_; __i < __from_e;
				++__i, ++__pos, __tx.__pos_ = __pos) {
	__alloc_traits::construct(this->__alloc(),			__alloc_traits::construct(this->__alloc(),
	_VSTD::__to_address(__tx.__pos_),			_VSTD::__to_address(__pos),
	_VSTD::move(*__i));			_VSTD::move(*__i));
	}			}
	}			}
	_VSTD::move_backward(__from_s, __from_s + __n, __old_last);			_VSTD::move_backward(__from_s, __from_s + __n, __old_last);
	}			}

	template <class _Tp, class _Allocator>			template <class _Tp, class _Allocator>
	typename vector<_Tp, _Allocator>::iterator			typename vector<_Tp, _Allocator>::iterator
	▲ Show 20 Lines • Show All 1,648 Lines • Show Last 20 Lines