This is an archive of the discontinued LLVM Phabricator instance.

@avieira I've rearranged the conditions so it's easier to understand (no else statements). I think the generated code is the same. Can you confirm that it performs the same?

libc/src/string/aarch64/memcmp.cpp
43	@avieira shouldn't this be `Skip<64>` instead of `Skip<32>` here?

Harbormaster completed remote builds in B115807: Diff 361139.Jul 23 2021, 3:02 AM

avieira added inline comments.Jul 23 2021, 3:39 AM

libc/src/string/aarch64/memcmp.cpp
43	No I think 32 is right, It only goes through two _16 Equals before reaching this on lines 34 and 38. However, since it's guaranteed >= 64 we could do something like Skip<32>::Then<Chained<_32,Loop<_16>> (untested). I'll benchmark it and let you know.

Fix build

Just updated the patch it was not building...

Harbormaster completed remote builds in B115825: Diff 361160.Jul 23 2021, 5:24 AM

LGTM

libc/src/string/aarch64/memcmp.cpp
43	Sorry it took so long, kinda fell through the cracks. But yeah use this patch as is, don't make it Skip<64>. I benchmarked the Chained<_32, Loop<_16>> approach and it lead to slightly worst codegen. FYI I didn't use Chained as that requires the Tail to be a constant size operation, so I had locally created a new element to do a sequence, almost like a 'ChainEnd' where the tail can be variable size and you can't chain it further.

rebase

Thx for the review I'll let the build bot run first and submit this patch.

This revision is now accepted and ready to land.Jul 29 2021, 5:48 AM

Harbormaster completed remote builds in B116948: Diff 362736.Jul 29 2021, 6:34 AM

Closed by commit rGcd2f5d5b496d: [libc] rewrite aarch64 memcmp implementation (authored by gchatelet). · Explain WhyJul 29 2021, 7:41 AM

This revision was automatically updated to reflect the committed changes.

gchatelet added a commit: rGcd2f5d5b496d: [libc] rewrite aarch64 memcmp implementation.

Revision Contents

Path

Size

libc/

src/

string/

aarch64/

memcmp.cpp

53 lines

memory_utils/

elements.h

40 lines

Diff 362774

libc/src/string/aarch64/memcmp.cpp

	//===-- Implementation of memcmp ------------------------------------------===//			//===-- Implementation of memcmp ------------------------------------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "src/string/memcmp.h"			#include "src/string/memcmp.h"
	#include "src/__support/common.h"			#include "src/__support/common.h"
	#include "src/string/memory_utils/elements.h"			#include "src/string/memory_utils/elements.h"
	#include <stddef.h> // size_t			#include <stddef.h> // size_t

	namespace __llvm_libc {			namespace __llvm_libc {
	namespace aarch64 {

	static int memcmp_impl(const char lhs, const char rhs, size_t count) {			static int memcmp_aarch64(const char lhs, const char rhs, size_t count) {
	if (count == 0)			// Use aarch64 strategies (_1, _2, _3 ...)
				using namespace __llvm_libc::aarch64;

				if (count == 0) // [0, 0]
	return 0;			return 0;
	if (count == 1)			if (count == 1) // [1, 1]
	return ThreeWayCompare<_1>(lhs, rhs);			return ThreeWayCompare<_1>(lhs, rhs);
	else if (count == 2)			if (count == 2) // [2, 2]
	return ThreeWayCompare<_2>(lhs, rhs);			return ThreeWayCompare<_2>(lhs, rhs);
	else if (count == 3)			if (count == 3) // [3, 3]
	return ThreeWayCompare<_3>(lhs, rhs);			return ThreeWayCompare<_3>(lhs, rhs);
	else if (count < 8)			if (count < 8) // [4, 7]
	return ThreeWayCompare<HeadTail<_4>>(lhs, rhs, count);			return ThreeWayCompare<HeadTail<_4>>(lhs, rhs, count);
	else if (count < 16)			if (count < 16) // [8, 15]
	return ThreeWayCompare<HeadTail<_8>>(lhs, rhs, count);			return ThreeWayCompare<HeadTail<_8>>(lhs, rhs, count);
	else if (count < 128) {			if (unlikely(count >= 128)) // [128, ∞]
	if (Equals<_16>(lhs, rhs)) {			return ThreeWayCompare<Align<_16>::Then<Loop<_32>>>(lhs, rhs, count);
	if (count < 32)			if (!Equals<_16>(lhs, rhs)) // [16, 16]
				return ThreeWayCompare<_16>(lhs, rhs);
				if (count < 32) // [17, 31]
	return ThreeWayCompare<Tail<_16>>(lhs, rhs, count);			return ThreeWayCompare<Tail<_16>>(lhs, rhs, count);
	else {			if (!Equals<Skip<16>::Then<_16>>(lhs, rhs)) // [32, 32]
	if (Equals<_16>(lhs + 16, rhs + 16)) {			return ThreeWayCompare<Skip<16>::Then<_16>>(lhs, rhs);
	if (count < 64)			if (count < 64) // [33, 63]
	return ThreeWayCompare<Tail<_32>>(lhs, rhs, count);			return ThreeWayCompare<Tail<_32>>(lhs, rhs, count);
	if (count < 128)			// [64, 127]
	return ThreeWayCompare<Loop<_16>>(lhs + 32, rhs + 32, count - 32);			return ThreeWayCompare<Skip<32>::Then<Loop<_16>>>(lhs, rhs, count);
				gchateletAuthorUnsubmitted Done Reply Inline Actions @avieira shouldn't this be `Skip<64>` instead of `Skip<32>` here? gchatelet: @avieira shouldn't this be `Skip<64>` instead of `Skip<32>` here?
				avieiraUnsubmitted Not Done Reply Inline Actions No I think 32 is right, It only goes through two _16 Equals before reaching this on lines 34 and 38. However, since it's guaranteed >= 64 we could do something like Skip<32>::Then<Chained<_32,Loop<_16>> (untested). I'll benchmark it and let you know. avieira: No I think 32 is right, It only goes through two _16 Equals before reaching this on lines 34…
				avieiraUnsubmitted Done Reply Inline Actions Sorry it took so long, kinda fell through the cracks. But yeah use this patch as is, don't make it Skip<64>. I benchmarked the Chained<_32, Loop<_16>> approach and it lead to slightly worst codegen. FYI I didn't use Chained as that requires the Tail to be a constant size operation, so I had locally created a new element to do a sequence, almost like a 'ChainEnd' where the tail can be variable size and you can't chain it further. avieira: Sorry it took so long, kinda fell through the cracks. But yeah use this patch as is, don't make…
	} else
	return ThreeWayCompare<_16>(lhs + count - 32, rhs + count - 32);
	}
	}			}
	return ThreeWayCompare<_16>(lhs, rhs);
	} else
	return ThreeWayCompare<Align<_16, Arg::Lhs>::Then<Loop<_32>>>(lhs, rhs,
	count);
	}
	} // namespace aarch64

	LLVM_LIBC_FUNCTION(int, memcmp,			LLVM_LIBC_FUNCTION(int, memcmp,
	(const void lhs, const void rhs, size_t count)) {			(const void lhs, const void rhs, size_t count)) {
				return memcmp_aarch64(reinterpret_cast<const char *>(lhs),
	const char _lhs = reinterpret_cast<const char >(lhs);			reinterpret_cast<const char *>(rhs), count);
	const char _rhs = reinterpret_cast<const char >(rhs);
	return aarch64::memcmp_impl(_lhs, _rhs, count);
	}			}

	} // namespace __llvm_libc			} // namespace __llvm_libc

libc/src/string/memory_utils/elements.h

Show First 20 Lines • Show All 298 Lines • ▼ Show 20 Lines
// An alignment operation that:		// An alignment operation that:
// - executes the 'AlignmentT' operation		// - executes the 'AlignmentT' operation
// - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected		// - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected
// pointer gets aligned, size is decreased accordingly.		// pointer gets aligned, size is decreased accordingly.
// - calls the 'NextT' operation.		// - calls the 'NextT' operation.
//		//
// e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:		// e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
// Copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count);		// Copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count);
template <typename AlignmentT, Arg AlignOn> struct Align {		template <typename AlignmentT, Arg AlignOn = Arg::_1> struct Align {
private:		private:
static constexpr size_t Alignment = AlignmentT::kSize;		static constexpr size_t Alignment = AlignmentT::kSize;
static_assert(Alignment > 1, "Alignment must be more than 1");		static_assert(Alignment > 1, "Alignment must be more than 1");
static_assert(is_power2(Alignment), "Alignment must be a power of 2");		static_assert(is_power2(Alignment), "Alignment must be a power of 2");

public:		public:
template <typename NextT> struct Then {		template <typename NextT> struct Then {
static void Copy(char __restrict dst, const char __restrict src,		static void Copy(char __restrict dst, const char __restrict src,
Show All 21 Lines	static void SplatSet(char *dst, const unsigned char value, size_t size) {
AlignmentT::SplatSet(dst, value);		AlignmentT::SplatSet(dst, value);
char *dummy = nullptr;		char *dummy = nullptr;
internal::AlignHelper<Arg::_1, Alignment>::Bump(dst, dummy, size);		internal::AlignHelper<Arg::_1, Alignment>::Bump(dst, dummy, size);
NextT::SplatSet(dst, value, size);		NextT::SplatSet(dst, value, size);
}		}
};		};
};		};

		// An operation that allows to skip the specified amount of bytes.
		template <ptrdiff_t Bytes> struct Skip {
		template <typename NextT> struct Then {
		static void Copy(char __restrict dst, const char __restrict src,
		size_t size) {
		NextT::Copy(dst + Bytes, src + Bytes, size - Bytes);
		}

		static void Copy(char __restrict dst, const char __restrict src) {
		NextT::Copy(dst + Bytes, src + Bytes);
		}

		static bool Equals(const char lhs, const char rhs, size_t size) {
		return NextT::Equals(lhs + Bytes, rhs + Bytes, size - Bytes);
		}

		static bool Equals(const char lhs, const char rhs) {
		return NextT::Equals(lhs + Bytes, rhs + Bytes);
		}

		static int ThreeWayCompare(const char lhs, const char rhs, size_t size) {
		return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes, size - Bytes);
		}

		static int ThreeWayCompare(const char lhs, const char rhs) {
		return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes);
		}

		static void SplatSet(char *dst, const unsigned char value, size_t size) {
		NextT::SplatSet(dst + Bytes, value, size - Bytes);
		}

		static void SplatSet(char *dst, const unsigned char value) {
		NextT::SplatSet(dst + Bytes, value);
		}
		};
		};

// Fixed-size Builtin Operations		// Fixed-size Builtin Operations
// -----------------------------		// -----------------------------
// Note: Do not use 'builtin' right now as it requires the implementation of the		// Note: Do not use 'builtin' right now as it requires the implementation of the
// `_inline` versions of all the builtins. Theoretically, Clang can still turn		// `_inline` versions of all the builtins. Theoretically, Clang can still turn
// them into calls to the C library leading to reentrancy problems.		// them into calls to the C library leading to reentrancy problems.
namespace builtin {		namespace builtin {

#ifndef __has_builtin		#ifndef __has_builtin
▲ Show 20 Lines • Show All 147 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[libc] rewrite aarch64 memcmp implementationClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 362774

libc/src/string/aarch64/memcmp.cpp

libc/src/string/memory_utils/elements.h

[libc] rewrite aarch64 memcmp implementation
ClosedPublic