This is an archive of the discontinued LLVM Phabricator instance.

[X86] Expose the various _rot intrinsics on non-MS platforms
ClosedPublic

Authored by mkuper on Aug 23 2015, 6:38 AM.

Download Raw Diff

Details

Reviewers

majnemer
rnk

Commits

rG2c8f9c2c23e0: [X86] Expose the various _rot intrinsics on non-MS platforms
rC245923: [X86] Expose the various _rot intrinsics on non-MS platforms
rL245923: [X86] Expose the various _rot intrinsics on non-MS platforms

Summary

_rotl, _rotwl and _lrotl (and their right-shift counterparts) are official x86 intrinsics, and should be supported regardless of environment.
This is in contrast to _rotl8, _rotl16, and _rotl64 which are MS-specific.

Note that the MS documentation for _lrotl is different from the Intel documentation. Intel explicitly documents it as a 64-bit rotate, while for MS, since sizeof(unsigned long) for MSVC is 4, a 32-bit rotate is clearly implied.
Compare:
https://msdn.microsoft.com/en-us/library/a0w705h5.aspx
vs.
https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=rot&techs=Other&expand=3193

Note that this doesn't change the implementations of these intrinsics, which are currently pretty awful.
We only manage to match the 32-bit versions to a rotate, and even then, still have the "and" and the control flow in place. That should be dealt with separately.

Diff Detail

Repository: rL LLVM

Event Timeline

mkuper updated this revision to Diff 32925.Aug 23 2015, 6:38 AM

mkuper retitled this revision from to [X86] Expose the various _rot intrinsics on non-MS platforms.

mkuper updated this object.

mkuper added reviewers: majnemer, rnk.

mkuper added a subscriber: cfe-commits.

This looks good.

As a larger issue, LLVM fast isel definitely won't pattern match this series of shifts and selects to rotl at -O0. There are some users who want branchless constant time rotates regardless of optimization level (https://llvm.org/bugs/show_bug.cgi?id=24226). My thinking is that the shifts are more analyzable to LLVM than an intrinsic, so we should leave these intrinsics alone and tell such users to use inline asm if they need these kinds of low-level guarantees. It still isn't very satisfactory. =/

test/CodeGen/x86-rot-intrinsics.c
9–11 ↗	(On Diff #32925)	Any reason not to use -ffreestanding to deal with this on the Linux side of the test like we do for windows?

This revision is now accepted and ready to land.Aug 24 2015, 9:14 AM

In D12271#231204, @rnk wrote:

This looks good.

As a larger issue, LLVM fast isel definitely won't pattern match this series of shifts and selects to rotl at -O0. There are some users who want branchless constant time rotates regardless of optimization level (https://llvm.org/bugs/show_bug.cgi?id=24226). My thinking is that the shifts are more analyzable to LLVM than an intrinsic, so we should leave these intrinsics alone and tell such users to use inline asm if they need these kinds of low-level guarantees. It still isn't very satisfactory. =/

I don't remember anything in PR24226 about "regardless of optimization level". In particular, code with "constant time" requirements won't tolerate -O0 codegen anyway because of the large amount of memory accesses into the stack -- the timing of such operations will depend on nasty things like cache line aliasing and such, which has similar problems to branches.

Closed by commit rL245923: [X86] Expose the various _rot intrinsics on non-MS platforms (authored by mkuper). · Explain WhyAug 25 2015, 12:22 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

cfe/

trunk/

lib/

Headers/

Intrin.h

20 lines

immintrin.h

54 lines

test/

CodeGen/

x86-rot-intrinsics.c

89 lines

Diff 33050

cfe/trunk/lib/Headers/Intrin.h

Show First 20 Lines • Show All 457 Lines • ▼ Show 20 Lines	_rotl16(unsigned short _Value, unsigned char _Shift) {
_Shift &= 0xf;		_Shift &= 0xf;
return _Shift ? (_Value << _Shift) \| (_Value >> (16 - _Shift)) : _Value;		return _Shift ? (_Value << _Shift) \| (_Value >> (16 - _Shift)) : _Value;
}		}
static __inline__ unsigned short __DEFAULT_FN_ATTRS		static __inline__ unsigned short __DEFAULT_FN_ATTRS
_rotr16(unsigned short _Value, unsigned char _Shift) {		_rotr16(unsigned short _Value, unsigned char _Shift) {
_Shift &= 0xf;		_Shift &= 0xf;
return _Shift ? (_Value >> _Shift) \| (_Value << (16 - _Shift)) : _Value;		return _Shift ? (_Value >> _Shift) \| (_Value << (16 - _Shift)) : _Value;
}		}
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_rotl(unsigned int _Value, int _Shift) {
_Shift &= 0x1f;
return _Shift ? (_Value << _Shift) \| (_Value >> (32 - _Shift)) : _Value;
}
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_rotr(unsigned int _Value, int _Shift) {
_Shift &= 0x1f;
return _Shift ? (_Value >> _Shift) \| (_Value << (32 - _Shift)) : _Value;
}
static __inline__ unsigned long __DEFAULT_FN_ATTRS
_lrotl(unsigned long _Value, int _Shift) {
_Shift &= 0x1f;
return _Shift ? (_Value << _Shift) \| (_Value >> (32 - _Shift)) : _Value;
}
static __inline__ unsigned long __DEFAULT_FN_ATTRS
_lrotr(unsigned long _Value, int _Shift) {
_Shift &= 0x1f;
return _Shift ? (_Value >> _Shift) \| (_Value << (32 - _Shift)) : _Value;
}
static		static
__inline__ unsigned __int64 __DEFAULT_FN_ATTRS		__inline__ unsigned __int64 __DEFAULT_FN_ATTRS
_rotl64(unsigned __int64 _Value, int _Shift) {		_rotl64(unsigned __int64 _Value, int _Shift) {
_Shift &= 0x3f;		_Shift &= 0x3f;
return _Shift ? (_Value << _Shift) \| (_Value >> (64 - _Shift)) : _Value;		return _Shift ? (_Value << _Shift) \| (_Value >> (64 - _Shift)) : _Value;
}		}
static		static
__inline__ unsigned __int64 __DEFAULT_FN_ATTRS		__inline__ unsigned __int64 __DEFAULT_FN_ATTRS
▲ Show 20 Lines • Show All 463 Lines • Show Last 20 Lines

cfe/trunk/lib/Headers/immintrin.h

	Show First 20 Lines • Show All 142 Lines • ▼ Show 20 Lines
	#include <shaintrin.h>			#include <shaintrin.h>

	#include <fxsrintrin.h>			#include <fxsrintrin.h>

	/* Some intrinsics inside adxintrin.h are available only on processors with ADX,			/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
	* whereas others are also available at all times. */			* whereas others are also available at all times. */
	#include <adxintrin.h>			#include <adxintrin.h>

				static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
				_rotwl(unsigned short _Value, int _Shift) {
				_Shift &= 0xf;
				return _Shift ? (_Value << _Shift) \| (_Value >> (16 - _Shift)) : _Value;
				}

				static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
				_rotwr(unsigned short _Value, int _Shift) {
				_Shift &= 0xf;
				return _Shift ? (_Value >> _Shift) \| (_Value << (16 - _Shift)) : _Value;
				}

				static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
				_rotl(unsigned int _Value, int _Shift) {
				_Shift &= 0x1f;
				return _Shift ? (_Value << _Shift) \| (_Value >> (32 - _Shift)) : _Value;
				}

				static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
				_rotr(unsigned int _Value, int _Shift) {
				_Shift &= 0x1f;
				return _Shift ? (_Value >> _Shift) \| (_Value << (32 - _Shift)) : _Value;
				}

				/*
				* MS defines _lrotl/_lrotr in a slightly incompatible way, since
				* unsigned long is always 32-bit in MSVC.
				*/
				#ifdef _MSC_VER
				static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
				_lrotl(unsigned long _Value, int _Shift) {
				_Shift &= 0x1f;
				return _Shift ? (_Value << _Shift) \| (_Value >> (32 - _Shift)) : _Value;
				}

				static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
				_lrotr(unsigned long _Value, int _Shift) {
				_Shift &= 0x1f;
				return _Shift ? (_Value >> _Shift) \| (_Value << (32 - _Shift)) : _Value;
				}
				#else
				static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
				_lrotl(unsigned long _Value, int _Shift) {
				_Shift &= 0x3f;
				return _Shift ? (_Value << _Shift) \| (_Value >> (64 - _Shift)) : _Value;
				}

				static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
				_lrotr(unsigned long _Value, int _Shift) {
				_Shift &= 0x3f;
				return _Shift ? (_Value >> _Shift) \| (_Value << (64 - _Shift)) : _Value;
				}
				#endif

	#endif /* __IMMINTRIN_H */			#endif /* __IMMINTRIN_H */

cfe/trunk/test/CodeGen/x86-rot-intrinsics.c

Property	Old Value	New Value
svn:eol-style	null	native
svn:keywords	null	Author Date Id Rev URL
svn:mime-type	null	text/plain

				// RUN: %clang_cc1 %s -triple=i686-pc-linux -emit-llvm -o - \| FileCheck %s
				// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
				// RUN: -triple i686--windows -emit-llvm %s -o - \
				// RUN: \| FileCheck %s -check-prefix CHECK -check-prefix MSC

				// Don't include mm_malloc.h, it's system specific.
				#define __MM_MALLOC_H

				#ifdef _MSC_VER
				#include <Intrin.h>
				#else
				#include <immintrin.h>
				#endif

				#ifdef _MSC_VER
				unsigned char test_rotl8(unsigned char v, unsigned char s) {
				//MSC-LABEL: test_rotl8
				//MSC-NOT: call
				return _rotl8(v, s);
				}

				unsigned char test_rotr8(unsigned char v, unsigned char s) {
				//MSC-LABEL: test_rotr8
				//MSC-NOT: call
				return _rotr8(v, s);
				}

				unsigned short test_rotl16(unsigned short v, unsigned char s) {
				//MSC-LABEL: test_rotl16
				//MSC-NOT: call
				return _rotl16(v, s);
				}

				unsigned short test_rotr16(unsigned short v, unsigned char s) {
				//MSC-LABEL: test_rotr16
				//MSC-NOT: call
				return _rotr16(v, s);
				}

				unsigned __int64 test_rotl64(unsigned __int64 v, int s) {
				//MSC-LABEL: test_rotl64
				//MSC-NOT: call
				return _rotl64(v, s);
				}

				unsigned __int64 test_rotr64(unsigned __int64 v, int s) {
				//MSC-LABEL: test_rotr64
				//MSC-NOT: call
				return _rotr64(v, s);
				}
				#endif

				unsigned short test_rotwl(unsigned short v, unsigned short s) {
				//CHECK-LABEL: test_rotwl
				//CHECK-NOT: call
				return _rotwl(v, s);
				}

				unsigned short test_rotwr(unsigned short v, unsigned short s) {
				//CHECK-LABEL: test_rotwr
				//CHECK-NOT: call
				return _rotwr(v, s);
				}

				unsigned int test_rotl(unsigned int v, int s) {
				//CHECK-LABEL: test_rotl
				//CHECK-NOT: call
				return _rotl(v, s);
				}

				unsigned int test_rotr(unsigned int v, int s) {
				//CHECK-LABEL: test_rotr
				//CHECK-NOT: call
				return _rotr(v, s);
				}

				unsigned long test_lrotl(unsigned long v, int s) {
				//CHECK-LABEL: test_lrotl
				//CHECK-NOT: call
				return _lrotl(v, s);
				}

				unsigned long test_lrotr(unsigned long v, int s) {
				//CHECK-LABEL: test_lrotr
				//CHECK-NOT: call
				return _lrotr(v, s);
				}

				//CHECK-LABEL: attributes