Index: lib/Basic/Targets/X86.cpp =================================================================== --- lib/Basic/Targets/X86.cpp +++ lib/Basic/Targets/X86.cpp @@ -1081,6 +1081,9 @@ if (HasMWAITX) Builder.defineMacro("__MWAITX__"); + if (HasMOVBE) + Builder.defineMacro("__MOVBE__"); + switch (XOPLevel) { case XOP: Builder.defineMacro("__XOP__"); Index: lib/Headers/immintrin.h =================================================================== --- lib/Headers/immintrin.h +++ lib/Headers/immintrin.h @@ -306,6 +306,65 @@ #endif #endif /* __FSGSBASE__ */ +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__) + +/* The structs used below are to force the load/store to be unaligned. This + * is accomplished with the __packed__ attribute. The __may_alias__ prevents + * tbaa metadata from being generated based on the struct and the type of the + * field inside of it. + */ + +static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i16(void const * __P) { + struct __loadu_i16 { + short __v; + } __attribute__((__packed__, __may_alias__)); + return __builtin_bswap16(((struct __loadu_i16*)__P)->__v); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i16(void * __P, short __D) { + struct __storeu_i16 { + short __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i32(void const * __P) { + struct __loadu_i32 { + int __v; + } __attribute__((__packed__, __may_alias__)); + return __builtin_bswap32(((struct __loadu_i32*)__P)->__v); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i32(void * __P, int __D) { + struct __storeu_i32 { + int __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D); +} + +#ifdef __x86_64__ +static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_loadbe_i64(void const * __P) { + struct __loadu_i64 { + long long __v; + } __attribute__((__packed__, __may_alias__)); + return __builtin_bswap64(((struct __loadu_i64*)__P)->__v); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) +_storebe_i64(void * __P, long long __D) { + struct __storeu_i64 { + long long __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D); +} +#endif +#endif /* __MOVBE */ + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) #include #include Index: test/CodeGen/movbe-builtins.c =================================================================== --- /dev/null +++ test/CodeGen/movbe-builtins.c @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-X64 +// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +movbe -emit-llvm -o - | FileCheck %s + + +#include + +short test_loadbe_i16(const short *P) { + // CHECK-LABEL: @test_loadbe_i16 + // CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1 + // CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]]) + return _loadbe_i16(P); +} + +void test_storebe_i16(short *P, short D) { + // CHECK-LABEL: @test_storebe_i16 + // CHECK: [[DATA:%.*]] = call i16 @llvm.bswap.i16(i16 %{{.*}}) + // CHECK: store i16 [[DATA]], i16* %{{.*}}, align 1 + _storebe_i16(P, D); +} + +int test_loadbe_i32(const int *P) { + // CHECK-LABEL: @test_loadbe_i32 + // CHECK: [[LOAD:%.*]] = load i32, i32* %{{.*}}, align 1 + // CHECK: call i32 @llvm.bswap.i32(i32 [[LOAD]]) + return _loadbe_i32(P); +} + +void test_storebe_i32(int *P, int D) { + // CHECK-LABEL: @test_storebe_i32 + // CHECK: [[DATA:%.*]] = call i32 @llvm.bswap.i32(i32 %{{.*}}) + // CHECK: store i32 [[DATA]], i32* %{{.*}}, align 1 + _storebe_i32(P, D); +} + +#ifdef __x86_64__ +long long test_loadbe_i64(const long long *P) { + // CHECK-X64-LABEL: @test_loadbe_i64 + // CHECK-X64: [[LOAD:%.*]] = load i64, i64* %{{.*}}, align 1 + // CHECK-X64: call i64 @llvm.bswap.i64(i64 [[LOAD]]) + return _loadbe_i64(P); +} + +void test_storebe_i64(long long *P, long long D) { + // CHECK-X64-LABEL: @test_storebe_i64 + // CHECK-X64: [[DATA:%.*]] = call i64 @llvm.bswap.i64(i64 %{{.*}}) + // CHECK-X64: store i64 [[DATA]], i64* %{{.*}}, align 1 + _storebe_i64(P, D); +} +#endif Index: test/Preprocessor/predefined-arch-macros.c =================================================================== --- test/Preprocessor/predefined-arch-macros.c +++ test/Preprocessor/predefined-arch-macros.c @@ -524,6 +524,7 @@ // CHECK_CORE_AVX2_M32: #define __INVPCID__ 1 // CHECK_CORE_AVX2_M32: #define __LZCNT__ 1 // CHECK_CORE_AVX2_M32: #define __MMX__ 1 +// CHECK_CORE_AVX2_M32: #define __MOVBE__ 1 // CHECK_CORE_AVX2_M32: #define __PCLMUL__ 1 // CHECK_CORE_AVX2_M32: #define __POPCNT__ 1 // CHECK_CORE_AVX2_M32: #define __RDRND__ 1 @@ -554,6 +555,7 @@ // CHECK_CORE_AVX2_M64: #define __INVPCID__ 1 // CHECK_CORE_AVX2_M64: #define __LZCNT__ 1 // CHECK_CORE_AVX2_M64: #define __MMX__ 1 +// CHECK_CORE_AVX2_M64: #define __MOVBE__ 1 // CHECK_CORE_AVX2_M64: #define __PCLMUL__ 1 // CHECK_CORE_AVX2_M64: #define __POPCNT__ 1 // CHECK_CORE_AVX2_M64: #define __RDRND__ 1 @@ -588,6 +590,7 @@ // CHECK_BROADWELL_M32: #define __INVPCID__ 1 // CHECK_BROADWELL_M32: #define __LZCNT__ 1 // CHECK_BROADWELL_M32: #define __MMX__ 1 +// CHECK_BROADWELL_M32: #define __MOVBE__ 1 // CHECK_BROADWELL_M32: #define __PCLMUL__ 1 // CHECK_BROADWELL_M32: #define __POPCNT__ 1 // CHECK_BROADWELL_M32: #define __PRFCHW__ 1 @@ -621,6 +624,7 @@ // CHECK_BROADWELL_M64: #define __INVPCID__ 1 // CHECK_BROADWELL_M64: #define __LZCNT__ 1 // CHECK_BROADWELL_M64: #define __MMX__ 1 +// CHECK_BROADWELL_M64: #define __MOVBE__ 1 // CHECK_BROADWELL_M64: #define __PCLMUL__ 1 // CHECK_BROADWELL_M64: #define __POPCNT__ 1 // CHECK_BROADWELL_M64: #define __PRFCHW__ 1 @@ -659,6 +663,7 @@ // CHECK_SKL_M32: #define __INVPCID__ 1 // CHECK_SKL_M32: #define __LZCNT__ 1 // CHECK_SKL_M32: #define __MMX__ 1 +// CHECK_SKL_M32: #define __MOVBE__ 1 // CHECK_SKL_M32: #define __MPX__ 1 // CHECK_SKL_M32: #define __PCLMUL__ 1 // CHECK_SKL_M32: #define __POPCNT__ 1 @@ -694,6 +699,7 @@ // CHECK_SKL_M64: #define __INVPCID__ 1 // CHECK_SKL_M64: #define __LZCNT__ 1 // CHECK_SKL_M64: #define __MMX__ 1 +// CHECK_SKL_M64: #define __MOVBE__ 1 // CHECK_SKL_M64: #define __MPX__ 1 // CHECK_SKL_M64: #define __PCLMUL__ 1 // CHECK_SKL_M64: #define __POPCNT__ 1 @@ -735,6 +741,7 @@ // CHECK_KNL_M32: #define __FMA__ 1 // CHECK_KNL_M32: #define __LZCNT__ 1 // CHECK_KNL_M32: #define __MMX__ 1 +// CHECK_KNL_M32: #define __MOVBE__ 1 // CHECK_KNL_M32: #define __PCLMUL__ 1 // CHECK_KNL_M32: #define __POPCNT__ 1 // CHECK_KNL_M32: #define __PREFETCHWT1__ 1 @@ -772,6 +779,7 @@ // CHECK_KNL_M64: #define __FMA__ 1 // CHECK_KNL_M64: #define __LZCNT__ 1 // CHECK_KNL_M64: #define __MMX__ 1 +// CHECK_KNL_M64: #define __MOVBE__ 1 // CHECK_KNL_M64: #define __PCLMUL__ 1 // CHECK_KNL_M64: #define __POPCNT__ 1 // CHECK_KNL_M64: #define __PREFETCHWT1__ 1 @@ -813,6 +821,7 @@ // CHECK_KNM_M32: #define __FMA__ 1 // CHECK_KNM_M32: #define __LZCNT__ 1 // CHECK_KNM_M32: #define __MMX__ 1 +// CHECK_KNM_M32: #define __MOVBE__ 1 // CHECK_KNM_M32: #define __PCLMUL__ 1 // CHECK_KNM_M32: #define __POPCNT__ 1 // CHECK_KNM_M32: #define __PREFETCHWT1__ 1 @@ -848,6 +857,7 @@ // CHECK_KNM_M64: #define __FMA__ 1 // CHECK_KNM_M64: #define __LZCNT__ 1 // CHECK_KNM_M64: #define __MMX__ 1 +// CHECK_KNM_M64: #define __MOVBE__ 1 // CHECK_KNM_M64: #define __PCLMUL__ 1 // CHECK_KNM_M64: #define __POPCNT__ 1 // CHECK_KNM_M64: #define __PREFETCHWT1__ 1 @@ -889,6 +899,7 @@ // CHECK_SKX_M32: #define __INVPCID__ 1 // CHECK_SKX_M32: #define __LZCNT__ 1 // CHECK_SKX_M32: #define __MMX__ 1 +// CHECK_SKX_M32: #define __MOVBE__ 1 // CHECK_SKX_M32: #define __MPX__ 1 // CHECK_SKX_M32: #define __PCLMUL__ 1 // CHECK_SKX_M32: #define __PKU__ 1 @@ -935,6 +946,7 @@ // CHECK_SKX_M64: #define __INVPCID__ 1 // CHECK_SKX_M64: #define __LZCNT__ 1 // CHECK_SKX_M64: #define __MMX__ 1 +// CHECK_SKX_M64: #define __MOVBE__ 1 // CHECK_SKX_M64: #define __MPX__ 1 // CHECK_SKX_M64: #define __PCLMUL__ 1 // CHECK_SKX_M64: #define __PKU__ 1 @@ -986,6 +998,7 @@ // CHECK_CNL_M32: #define __INVPCID__ 1 // CHECK_CNL_M32: #define __LZCNT__ 1 // CHECK_CNL_M32: #define __MMX__ 1 +// CHECK_CNL_M32: #define __MOVBE__ 1 // CHECK_CNL_M32: #define __MPX__ 1 // CHECK_CNL_M32: #define __PCLMUL__ 1 // CHECK_CNL_M32: #define __PKU__ 1 @@ -1035,6 +1048,7 @@ // CHECK_CNL_M64: #define __INVPCID__ 1 // CHECK_CNL_M64: #define __LZCNT__ 1 // CHECK_CNL_M64: #define __MMX__ 1 +// CHECK_CNL_M64: #define __MOVBE__ 1 // CHECK_CNL_M64: #define __MPX__ 1 // CHECK_CNL_M64: #define __PCLMUL__ 1 // CHECK_CNL_M64: #define __PKU__ 1 @@ -1090,6 +1104,7 @@ // CHECK_ICL_M32: #define __INVPCID__ 1 // CHECK_ICL_M32: #define __LZCNT__ 1 // CHECK_ICL_M32: #define __MMX__ 1 +// CHECK_ICL_M32: #define __MOVBE__ 1 // CHECK_ICL_M32: #define __MPX__ 1 // CHECK_ICL_M32: #define __PCLMUL__ 1 // CHECK_ICL_M32: #define __PKU__ 1 @@ -1148,6 +1163,7 @@ // CHECK_ICL_M64: #define __INVPCID__ 1 // CHECK_ICL_M64: #define __LZCNT__ 1 // CHECK_ICL_M64: #define __MMX__ 1 +// CHECK_ICL_M64: #define __MOVBE__ 1 // CHECK_ICL_M64: #define __MPX__ 1 // CHECK_ICL_M64: #define __PCLMUL__ 1 // CHECK_ICL_M64: #define __PKU__ 1 @@ -1207,6 +1223,7 @@ // CHECK_ICX_M32: #define __INVPCID__ 1 // CHECK_ICX_M32: #define __LZCNT__ 1 // CHECK_ICX_M32: #define __MMX__ 1 +// CHECK_ICX_M32: #define __MOVBE__ 1 // CHECK_ICX_M32: #define __MPX__ 1 // CHECK_ICX_M32: #define __PCLMUL__ 1 // CHECK_ICX_M32: #define __PCONFIG__ 1 @@ -1266,6 +1283,7 @@ // CHECK_ICX_M64: #define __INVPCID__ 1 // CHECK_ICX_M64: #define __LZCNT__ 1 // CHECK_ICX_M64: #define __MMX__ 1 +// CHECK_ICX_M64: #define __MOVBE__ 1 // CHECK_ICX_M64: #define __MPX__ 1 // CHECK_ICX_M64: #define __PCLMUL__ 1 // CHECK_ICX_M64: #define __PCONFIG__ 1 @@ -1303,6 +1321,7 @@ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATOM_M32 // CHECK_ATOM_M32: #define __MMX__ 1 +// CHECK_ATOM_M32: #define __MOVBE__ 1 // CHECK_ATOM_M32: #define __SSE2__ 1 // CHECK_ATOM_M32: #define __SSE3__ 1 // CHECK_ATOM_M32: #define __SSE__ 1 @@ -1318,6 +1337,7 @@ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATOM_M64 // CHECK_ATOM_M64: #define __MMX__ 1 +// CHECK_ATOM_M64: #define __MOVBE__ 1 // CHECK_ATOM_M64: #define __SSE2_MATH__ 1 // CHECK_ATOM_M64: #define __SSE2__ 1 // CHECK_ATOM_M64: #define __SSE3__ 1 @@ -1340,6 +1360,7 @@ // CHECK_GLM_M32: #define __FSGSBASE__ 1 // CHECK_GLM_M32: #define __FXSR__ 1 // CHECK_GLM_M32: #define __MMX__ 1 +// CHECK_GLM_M32: #define __MOVBE__ 1 // CHECK_GLM_M32: #define __MPX__ 1 // CHECK_GLM_M32: #define __PCLMUL__ 1 // CHECK_GLM_M32: #define __POPCNT__ 1 @@ -1373,6 +1394,7 @@ // CHECK_GLM_M64: #define __FSGSBASE__ 1 // CHECK_GLM_M64: #define __FXSR__ 1 // CHECK_GLM_M64: #define __MMX__ 1 +// CHECK_GLM_M64: #define __MOVBE__ 1 // CHECK_GLM_M64: #define __MPX__ 1 // CHECK_GLM_M64: #define __PCLMUL__ 1 // CHECK_GLM_M64: #define __POPCNT__ 1 @@ -1404,6 +1426,7 @@ // CHECK_GLMP_M32: #define __FSGSBASE__ 1 // CHECK_GLMP_M32: #define __FXSR__ 1 // CHECK_GLMP_M32: #define __MMX__ 1 +// CHECK_GLMP_M32: #define __MOVBE__ 1 // CHECK_GLMP_M32: #define __MPX__ 1 // CHECK_GLMP_M32: #define __PCLMUL__ 1 // CHECK_GLMP_M32: #define __POPCNT__ 1 @@ -1440,6 +1463,7 @@ // CHECK_GLMP_M64: #define __FSGSBASE__ 1 // CHECK_GLMP_M64: #define __FXSR__ 1 // CHECK_GLMP_M64: #define __MMX__ 1 +// CHECK_GLMP_M64: #define __MOVBE__ 1 // CHECK_GLMP_M64: #define __MPX__ 1 // CHECK_GLMP_M64: #define __PCLMUL__ 1 // CHECK_GLMP_M64: #define __POPCNT__ 1 @@ -1476,6 +1500,7 @@ // CHECK_TRM_M32: #define __FXSR__ 1 // CHECK_TRM_M32: #define __GFNI__ 1 // CHECK_TRM_M32: #define __MMX__ 1 +// CHECK_TRM_M32: #define __MOVBE__ 1 // CHECK_TRM_M32: #define __MOVDIR64B__ 1 // CHECK_TRM_M32: #define __MOVDIRI__ 1 // CHECK_TRM_M32: #define __MPX__ 1 @@ -1517,6 +1542,7 @@ // CHECK_TRM_M64: #define __FXSR__ 1 // CHECK_TRM_M64: #define __GFNI__ 1 // CHECK_TRM_M64: #define __MMX__ 1 +// CHECK_TRM_M64: #define __MOVBE__ 1 // CHECK_TRM_M64: #define __MOVDIR64B__ 1 // CHECK_TRM_M64: #define __MOVDIRI__ 1 // CHECK_TRM_M64: #define __MPX__ 1 @@ -1551,6 +1577,7 @@ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M32 // CHECK_SLM_M32: #define __FXSR__ 1 // CHECK_SLM_M32: #define __MMX__ 1 +// CHECK_SLM_M32: #define __MOVBE__ 1 // CHECK_SLM_M32: #define __PCLMUL__ 1 // CHECK_SLM_M32: #define __POPCNT__ 1 // CHECK_SLM_M32: #define __PRFCHW__ 1 @@ -1573,6 +1600,7 @@ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M64 // CHECK_SLM_M64: #define __FXSR__ 1 // CHECK_SLM_M64: #define __MMX__ 1 +// CHECK_SLM_M64: #define __MOVBE__ 1 // CHECK_SLM_M64: #define __PCLMUL__ 1 // CHECK_SLM_M64: #define __POPCNT__ 1 // CHECK_SLM_M64: #define __PRFCHW__ 1 @@ -2134,6 +2162,7 @@ // CHECK_BTVER2_M32: #define __F16C__ 1 // CHECK_BTVER2_M32: #define __LZCNT__ 1 // CHECK_BTVER2_M32: #define __MMX__ 1 +// CHECK_BTVER2_M32: #define __MOVBE__ 1 // CHECK_BTVER2_M32: #define __PCLMUL__ 1 // CHECK_BTVER2_M32: #define __POPCNT__ 1 // CHECK_BTVER2_M32: #define __PRFCHW__ 1 @@ -2163,6 +2192,7 @@ // CHECK_BTVER2_M64: #define __F16C__ 1 // CHECK_BTVER2_M64: #define __LZCNT__ 1 // CHECK_BTVER2_M64: #define __MMX__ 1 +// CHECK_BTVER2_M64: #define __MOVBE__ 1 // CHECK_BTVER2_M64: #define __PCLMUL__ 1 // CHECK_BTVER2_M64: #define __POPCNT__ 1 // CHECK_BTVER2_M64: #define __PRFCHW__ 1 @@ -2491,6 +2521,7 @@ // CHECK_ZNVER1_M32: #define __FSGSBASE__ 1 // CHECK_ZNVER1_M32: #define __LZCNT__ 1 // CHECK_ZNVER1_M32: #define __MMX__ 1 +// CHECK_ZNVER1_M32: #define __MOVBE__ 1 // CHECK_ZNVER1_M32: #define __PCLMUL__ 1 // CHECK_ZNVER1_M32: #define __POPCNT__ 1 // CHECK_ZNVER1_M32: #define __PRFCHW__ 1 @@ -2534,6 +2565,7 @@ // CHECK_ZNVER1_M64: #define __FSGSBASE__ 1 // CHECK_ZNVER1_M64: #define __LZCNT__ 1 // CHECK_ZNVER1_M64: #define __MMX__ 1 +// CHECK_ZNVER1_M64: #define __MOVBE__ 1 // CHECK_ZNVER1_M64: #define __PCLMUL__ 1 // CHECK_ZNVER1_M64: #define __POPCNT__ 1 // CHECK_ZNVER1_M64: #define __PRFCHW__ 1