diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -3590,6 +3590,8 @@ .. option:: -mclzero, -mno-clzero +.. option:: -mcrc32, -mno-crc32 + .. option:: -mcx16, -mno-cx16 .. option:: -menqcmd, -mno-enqcmd diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -421,9 +421,9 @@ TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","ncV:128:", "sse4.2") TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","ncV:128:", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "crc32") +TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "crc32") +TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "crc32") // SSE4a TARGET_BUILTIN(__builtin_ia32_extrqi, "V2OiV2OiIcIc", "ncV:128:", "sse4a") diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def --- a/clang/include/clang/Basic/BuiltinsX86_64.def +++ b/clang/include/clang/Basic/BuiltinsX86_64.def @@ -44,7 +44,7 @@ TARGET_BUILTIN(__builtin_ia32_movnti64, "vOi*Oi", "n", "sse2") TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "OiV2OiIi", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2OiV2OiOiIi", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "crc32") TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "OiV4OiIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4OiV4OiOiIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "n", "fsgsbase") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4206,6 +4206,8 @@ def mno_wbnoinvd : Flag<["-"], "mno-wbnoinvd">, Group; def mclzero : Flag<["-"], "mclzero">, Group; def mno_clzero : Flag<["-"], "mno-clzero">, Group; +def mcrc32 : Flag<["-"], "mcrc32">, Group; +def mno_crc32 : Flag<["-"], "mno-crc32">, Group; def mcx16 : Flag<["-"], "mcx16">, Group; def mno_cx16 : Flag<["-"], "mno-cx16">, Group; def menqcmd : Flag<["-"], "menqcmd">, Group; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -143,6 +143,7 @@ bool HasSERIALIZE = false; bool HasTSXLDTRK = false; bool HasUINTR = false; + bool HasCRC32 = false; protected: llvm::X86::CPUKind CPU = llvm::X86::CK_None; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -155,6 +155,12 @@ llvm::find(UpdatedFeaturesVec, "-xsave") == UpdatedFeaturesVec.end()) Features["xsave"] = true; + // Enable CRC32 if SSE4.2 is enabled and CRC32 is not explicitly disabled. + I = Features.find("sse4.2"); + if (I != Features.end() && I->getValue() && + llvm::find(UpdatedFeaturesVec, "-crc32") == UpdatedFeaturesVec.end()) + Features["crc32"] = true; + return true; } @@ -330,6 +336,8 @@ HasTSXLDTRK = true; } else if (Feature == "+uintr") { HasUINTR = true; + } else if (Feature == "+crc32") { + HasCRC32 = true; } X86SSEEnum Level = llvm::StringSwitch(Feature) @@ -758,6 +766,8 @@ Builder.defineMacro("__TSXLDTRK__"); if (HasUINTR) Builder.defineMacro("__UINTR__"); + if (HasCRC32) + Builder.defineMacro("__CRC32__"); // Each case falls through to the previous one here. switch (SSELevel) { @@ -878,6 +888,7 @@ .Case("clflushopt", true) .Case("clwb", true) .Case("clzero", true) + .Case("crc32", true) .Case("cx16", true) .Case("enqcmd", true) .Case("f16c", true) @@ -970,6 +981,7 @@ .Case("clflushopt", HasCLFLUSHOPT) .Case("clwb", HasCLWB) .Case("clzero", HasCLZERO) + .Case("crc32", HasCRC32) .Case("cx8", HasCX8) .Case("cx16", HasCX16) .Case("enqcmd", HasENQCMD) diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -58,6 +58,7 @@ cet.h cldemoteintrin.h clzerointrin.h + crc32intrin.h cpuid.h clflushoptintrin.h clwbintrin.h diff --git a/clang/lib/Headers/crc32intrin.h b/clang/lib/Headers/crc32intrin.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/crc32intrin.h @@ -0,0 +1,100 @@ +/*===---- crc32intrin.h - SSE4.2 Accumulate CRC32 intrinsics ---------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CRC32INTRIN_H +#define __CRC32INTRIN_H + +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("crc32"))) + +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned char operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CRC32B instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u8(unsigned int __C, unsigned char __D) +{ + return __builtin_ia32_crc32qi(__C, __D); +} + +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned short operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CRC32W instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u16(unsigned int __C, unsigned short __D) +{ + return __builtin_ia32_crc32hi(__C, __D); +} + +/// Adds the first unsigned integer operand to the CRC-32C checksum of +/// the second unsigned integer operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CRC32L instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u32(unsigned int __C, unsigned int __D) +{ + return __builtin_ia32_crc32si(__C, __D); +} + +#ifdef __x86_64__ +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned 64-bit integer operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CRC32Q instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_mm_crc32_u64(unsigned long long __C, unsigned long long __D) +{ + return __builtin_ia32_crc32di(__C, __D); +} +#endif /* __x86_64__ */ + +#undef __DEFAULT_FN_ATTRS + +#endif /* __CRC32INTRIN_H */ diff --git a/clang/lib/Headers/ia32intrin.h b/clang/lib/Headers/ia32intrin.h --- a/clang/lib/Headers/ia32intrin.h +++ b/clang/lib/Headers/ia32intrin.h @@ -16,7 +16,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) -#define __DEFAULT_FN_ATTRS_SSE42 __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +#define __DEFAULT_FN_ATTRS_CRC32 __attribute__((__always_inline__, __nodebug__, __target__("crc32"))) #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) constexpr @@ -282,7 +282,7 @@ * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42 +static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32b(unsigned int __C, unsigned char __D) { return __builtin_ia32_crc32qi(__C, __D); @@ -303,7 +303,7 @@ * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42 +static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32w(unsigned int __C, unsigned short __D) { return __builtin_ia32_crc32hi(__C, __D); @@ -324,7 +324,7 @@ * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42 +static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32d(unsigned int __C, unsigned int __D) { return __builtin_ia32_crc32si(__C, __D); @@ -346,7 +346,7 @@ * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned long long __DEFAULT_FN_ATTRS_SSE42 +static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CRC32 __crc32q(unsigned long long __C, unsigned long long __D) { return __builtin_ia32_crc32di(__C, __D); @@ -435,7 +435,7 @@ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_CAST -#undef __DEFAULT_FN_ATTRS_SSE42 +#undef __DEFAULT_FN_ATTRS_CRC32 #undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __IA32INTRIN_H */ diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -2338,91 +2338,10 @@ return (__m128i)((__v2di)__V1 > (__v2di)__V2); } -/* SSE4.2 Accumulate CRC32. */ -/// Adds the unsigned integer operand to the CRC-32C checksum of the -/// unsigned char operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the CRC32B instruction. -/// -/// \param __C -/// An unsigned integer operand to add to the CRC-32C checksum of operand -/// \a __D. -/// \param __D -/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum. -/// \returns The result of adding operand \a __C to the CRC-32C checksum of -/// operand \a __D. -static __inline__ unsigned int __DEFAULT_FN_ATTRS -_mm_crc32_u8(unsigned int __C, unsigned char __D) -{ - return __builtin_ia32_crc32qi(__C, __D); -} - -/// Adds the unsigned integer operand to the CRC-32C checksum of the -/// unsigned short operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the CRC32W instruction. -/// -/// \param __C -/// An unsigned integer operand to add to the CRC-32C checksum of operand -/// \a __D. -/// \param __D -/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum. -/// \returns The result of adding operand \a __C to the CRC-32C checksum of -/// operand \a __D. -static __inline__ unsigned int __DEFAULT_FN_ATTRS -_mm_crc32_u16(unsigned int __C, unsigned short __D) -{ - return __builtin_ia32_crc32hi(__C, __D); -} - -/// Adds the first unsigned integer operand to the CRC-32C checksum of -/// the second unsigned integer operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the CRC32L instruction. -/// -/// \param __C -/// An unsigned integer operand to add to the CRC-32C checksum of operand -/// \a __D. -/// \param __D -/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum. -/// \returns The result of adding operand \a __C to the CRC-32C checksum of -/// operand \a __D. -static __inline__ unsigned int __DEFAULT_FN_ATTRS -_mm_crc32_u32(unsigned int __C, unsigned int __D) -{ - return __builtin_ia32_crc32si(__C, __D); -} - -#ifdef __x86_64__ -/// Adds the unsigned integer operand to the CRC-32C checksum of the -/// unsigned 64-bit integer operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the CRC32Q instruction. -/// -/// \param __C -/// An unsigned integer operand to add to the CRC-32C checksum of operand -/// \a __D. -/// \param __D -/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum. -/// \returns The result of adding operand \a __C to the CRC-32C checksum of -/// operand \a __D. -static __inline__ unsigned long long __DEFAULT_FN_ATTRS -_mm_crc32_u64(unsigned long long __C, unsigned long long __D) -{ - return __builtin_ia32_crc32di(__C, __D); -} -#endif /* __x86_64__ */ - #undef __DEFAULT_FN_ATTRS #include +#include + #endif /* __SMMINTRIN_H */ diff --git a/clang/lib/Headers/x86gprintrin.h b/clang/lib/Headers/x86gprintrin.h --- a/clang/lib/Headers/x86gprintrin.h +++ b/clang/lib/Headers/x86gprintrin.h @@ -20,6 +20,11 @@ #include #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__CRC32__) +#include +#endif + #define __SSC_MARK(Tag) \ __asm__ __volatile__("movl %%ebx, %%eax; movl %0, %%ebx; .byte 0x64, 0x67, " \ "0x90; movl %%eax, %%ebx;" ::"i"(Tag) \ diff --git a/clang/test/CodeGen/X86/x86-crc-builtins.c b/clang/test/CodeGen/X86/x86-crc-builtins.c --- a/clang/test/CodeGen/X86/x86-crc-builtins.c +++ b/clang/test/CodeGen/X86/x86-crc-builtins.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK64 // RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +crc32 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +crc32 -emit-llvm -o - -Wall -Werror | FileCheck %s #include @@ -28,3 +30,29 @@ return __crc32q(CRC, V); } #endif + +unsigned int test_mm_crc32_u8(unsigned int CRC, unsigned char V) { +// CHECK-LABEL: test_mm_crc32_u8 +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) + return _mm_crc32_u8(CRC, V); +} + +unsigned int test_mm_crc32_u16(unsigned int CRC, unsigned short V) { +// CHECK-LABEL: test_mm_crc32_u16 +// CHECK: call i32 @llvm.x86.sse42.crc32.32.16(i32 %{{.*}}, i16 %{{.*}}) + return _mm_crc32_u16(CRC, V); +} + +unsigned int test_mm_crc32_u32(unsigned int CRC, unsigned int V) { +// CHECK-LABEL: test_mm_crc32_u32 +// CHECK: call i32 @llvm.x86.sse42.crc32.32.32(i32 %{{.*}}, i32 %{{.*}}) + return _mm_crc32_u32(CRC, V); +} + +#ifdef __x86_64__ +unsigned long long test_mm_crc32_u64(unsigned long long CRC, unsigned long long V) { +// CHECK64-LABEL: test_mm_crc32_u64 +// CHECK64: call i64 @llvm.x86.sse42.crc32.64.64(i64 %{{.*}}, i64 %{{.*}}) + return _mm_crc32_u64(CRC, V); +} +#endif diff --git a/clang/test/CodeGen/attr-cpuspecific.c b/clang/test/CodeGen/attr-cpuspecific.c --- a/clang/test/CodeGen/attr-cpuspecific.c +++ b/clang/test/CodeGen/attr-cpuspecific.c @@ -270,6 +270,6 @@ // WINDOWS: define dso_local i32 @DispatchFirst.B // WINDOWS: ret i32 1 -// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" -// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" // CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87" diff --git a/clang/test/CodeGen/attr-target-crc32-x86.c b/clang/test/CodeGen/attr-target-crc32-x86.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-target-crc32-x86.c @@ -0,0 +1,55 @@ +// Test crc32 target attribute on x86 + +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s + +// CHECK: define{{.*}} i32 @test1({{.*}}) [[TEST1_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) + +#define __MM_MALLOC_H + +#include + +unsigned int __attribute__((target("crc32"))) test1(unsigned int CRC, unsigned char V) { + return __builtin_ia32_crc32qi(CRC, V); +} + +// CHECK: define{{.*}} i32 @test2({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) +unsigned int __attribute__((target("general-regs-only,crc32"))) test2(unsigned int CRC, unsigned char V) { + return __builtin_ia32_crc32qi(CRC, V); +} + +// CHECK: define{{.*}} i32 @test3({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) +unsigned int __attribute__((target("crc32,general-regs-only"))) test3(unsigned int CRC, unsigned char V) { + return __builtin_ia32_crc32qi(CRC, V); +} + +// CHECK: define{{.*}} i32 @test4({{.*}}) [[TEST4_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) +unsigned int __attribute__((target("sse4.2"))) test4(unsigned int CRC, unsigned char V) { + return __builtin_ia32_crc32qi(CRC, V); +} + +// CHECK: define{{.*}} i32 @test5({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) +unsigned int __attribute__((target("sse4.2,general-regs-only,crc32"))) test5(unsigned int CRC, unsigned char V) { + return __builtin_ia32_crc32qi(CRC, V); +} + +// CHECK: define{{.*}} i32 @test6({{.*}}) [[TEST4_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %{{.*}}, i32 %{{.*}}, <16 x i8> %{{.*}}, i32 %{{.*}}, i8 7) +int __attribute__((target("sse4.2,no-crc32,crc32"))) test6(__m128i A, int LA, __m128i B, int LB) { + return _mm_cmpestra(A, LA, B, LB, 7); +} + +// CHECK: define{{.*}} i32 @test7({{.*}}) [[TEST4_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %{{.*}}, i32 %{{.*}}, <16 x i8> %{{.*}}, i32 %{{.*}}, i8 7) +int __attribute__((target("no-crc32,crc32,sse4.2"))) test7(__m128i A, int LA, __m128i B, int LB) { + return _mm_cmpestra(A, LA, B, LB, 7); +} + +// CHECK: attributes [[TEST1_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}" +// CHECK: attributes [[GPR_ONLY_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}" +// CHECK: attributes [[TEST4_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}+sse4.2{{.*}}" diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -52,12 +52,12 @@ // CHECK: use_before_def{{.*}} #7 // CHECK: walrus{{.*}} #8 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686" -// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" +// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" // CHECK-NOT: tune-cpu // CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" -// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" +// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" // CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" -// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" +// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" // CHECK-NOT: tune-cpu // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx" // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" @@ -65,8 +65,8 @@ // CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="sandybridge" // CHECK: "target-cpu"="x86-64-v2" -// CHECK-SAME: "target-features"="+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" +// CHECK-SAME: "target-features"="+crc32,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" // CHECK: "target-cpu"="x86-64-v3" -// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" // CHECK: "target-cpu"="x86-64-v4" -// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" diff --git a/clang/test/Driver/x86-mcrc32.c b/clang/test/Driver/x86-mcrc32.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/x86-mcrc32.c @@ -0,0 +1,41 @@ +// Test interaction between -mcrc32 and other SIMD ISA options on x86 + +// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s + +// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s + +// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s + +// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s + +// RUN: not %clang -target i386-unknown-linux-gnu -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s + +// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s + +// RUN: not %clang -target i386-unknown-linux-gnu -mcrc32 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -mcrc32 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s + +// RUN: not %clang -target i386-unknown-linux-gnu -mcrc32 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -mcrc32 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s + +// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mno-sse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mno-sse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s + +// RUN: %clang -target i386-unknown-linux-gnu -mno-sse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mno-sse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s + +unsigned int test__crc32b(unsigned int CRC, unsigned char V) { +// CHECK-LABEL: test__crc32b +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) + return __builtin_ia32_crc32qi(CRC, V); +} + +// ERROR: error: '__builtin_ia32_crc32qi' needs target feature crc32 + +// IR-CRC32: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}" diff --git a/clang/test/Driver/x86-mgeneral-regs-only-crc32.c b/clang/test/Driver/x86-mgeneral-regs-only-crc32.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/x86-mgeneral-regs-only-crc32.c @@ -0,0 +1,54 @@ +// Test the -mgeneral-regs-only with -mcrc32 option on x86 + +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s +// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s +// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s +// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s + +// RUN: not %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: not %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -mcrc32 -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s +// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s +// RUN: not %clang -target i386-unknown-linux-gnu -mno-crc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -mno-crc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s + +// CMD-BEFORE: "-target-feature" "+crc32" +// CMD: "-target-feature" "-x87" +// CMD: "-target-feature" "-mmx" +// CMD: "-target-feature" "-sse" +// CMD-AFTER: "-target-feature" "+crc32" + +unsigned int test__crc32b(unsigned int CRC, unsigned char V) { +// CHECK-LABEL: test__crc32b +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) + return __builtin_ia32_crc32qi(CRC, V); +} + +// ERROR: error: '__builtin_ia32_crc32qi' needs target feature crc32 + +// IR-GPR: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}" +// IR-AVX2: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+avx{{.*}}+avx2{{.*}}+crc32{{.*}}+sse{{.*}}+sse2{{.*}}+ssse3{{.*}}-avx512f{{.*}}-x87{{.*}}" diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -298,3 +298,8 @@ // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512fp16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX512FP16 %s // AVX512FP16: "-target-feature" "+avx512fp16" // NO-AVX512FP16: "-target-feature" "-avx512fp16" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CRC32 %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CRC32 %s +// CRC32: "-target-feature" "+crc32" +// NO-CRC32: "-target-feature" "-crc32" diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -580,3 +580,11 @@ // AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1 // AVX512FP16NOAVX512DQ-NOT: #define __AVX512FP16__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s + +// CRC32: #define __CRC32__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 -x c -E -dM -o - %s | FileCheck -check-prefix=NOCRC32 %s + +// NOCRC32-NOT: #define __CRC32__ 1 diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -158,6 +158,7 @@ X86_FEATURE (CLZERO, "clzero") X86_FEATURE (CMPXCHG16B, "cx16") X86_FEATURE (CMPXCHG8B, "cx8") +X86_FEATURE (CRC32, "crc32") X86_FEATURE (ENQCMD, "enqcmd") X86_FEATURE (F16C, "f16c") X86_FEATURE (FSGSBASE, "fsgsbase") diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1071,8 +1071,10 @@ setFeature(X86::FEATURE_FMA); if ((ECX >> 19) & 1) setFeature(X86::FEATURE_SSE4_1); - if ((ECX >> 20) & 1) + if ((ECX >> 20) & 1) { setFeature(X86::FEATURE_SSE4_2); + setFeature(X86::FEATURE_CRC32); + } if ((ECX >> 23) & 1) setFeature(X86::FEATURE_POPCNT); if ((ECX >> 25) & 1) @@ -1518,6 +1520,7 @@ Features["cx16"] = (ECX >> 13) & 1; Features["sse4.1"] = (ECX >> 19) & 1; Features["sse4.2"] = (ECX >> 20) & 1; + Features["crc32"] = Features["sse4.2"]; Features["movbe"] = (ECX >> 22) & 1; Features["popcnt"] = (ECX >> 23) & 1; Features["aes"] = (ECX >> 25) & 1; diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -139,8 +139,8 @@ // Basic 64-bit capable CPU. constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT; constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF | - FeaturePOPCNT | FeatureSSE4_2 | - FeatureCMPXCHG16B; + FeaturePOPCNT | FeatureCRC32 | + FeatureSSE4_2 | FeatureCMPXCHG16B; constexpr FeatureBitset FeaturesX86_64_V3 = FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C | FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE; @@ -153,7 +153,7 @@ FeaturesNocona | FeatureSAHF | FeatureSSSE3; constexpr FeatureBitset FeaturesPenryn = FeaturesCore2 | FeatureSSE4_1; constexpr FeatureBitset FeaturesNehalem = - FeaturesPenryn | FeaturePOPCNT | FeatureSSE4_2; + FeaturesPenryn | FeaturePOPCNT | FeatureCRC32 | FeatureSSE4_2; constexpr FeatureBitset FeaturesWestmere = FeaturesNehalem | FeaturePCLMUL; constexpr FeatureBitset FeaturesSandyBridge = FeaturesWestmere | FeatureAVX | FeatureXSAVE | FeatureXSAVEOPT; @@ -256,16 +256,17 @@ FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_A | FeatureSAHF; constexpr FeatureBitset FeaturesBTVER2 = - FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureF16C | - FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT; + FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureCRC32 | + FeatureF16C | FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT; // AMD Bulldozer architecture processors. constexpr FeatureBitset FeaturesBDVER1 = FeatureX87 | FeatureAES | FeatureAVX | FeatureCMPXCHG8B | - FeatureCMPXCHG16B | Feature64BIT | FeatureFMA4 | FeatureFXSR | FeatureLWP | - FeatureLZCNT | FeatureMMX | FeaturePCLMUL | FeaturePOPCNT | FeaturePRFCHW | - FeatureSAHF | FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | - FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A | FeatureXOP | FeatureXSAVE; + FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT | FeatureFMA4 | + FeatureFXSR | FeatureLWP | FeatureLZCNT | FeatureMMX | FeaturePCLMUL | + FeaturePOPCNT | FeaturePRFCHW | FeatureSAHF | FeatureSSE | FeatureSSE2 | + FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A | + FeatureXOP | FeatureXSAVE; constexpr FeatureBitset FeaturesBDVER2 = FeaturesBDVER1 | FeatureBMI | FeatureFMA | FeatureF16C | FeatureTBM; constexpr FeatureBitset FeaturesBDVER3 = @@ -278,9 +279,9 @@ constexpr FeatureBitset FeaturesZNVER1 = FeatureX87 | FeatureADX | FeatureAES | FeatureAVX | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureCLFLUSHOPT | FeatureCLZERO | - FeatureCMPXCHG8B | FeatureCMPXCHG16B | Feature64BIT | FeatureF16C | - FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT | FeatureMMX | - FeatureMOVBE | FeatureMWAITX | FeaturePCLMUL | FeaturePOPCNT | + FeatureCMPXCHG8B | FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT | + FeatureF16C | FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT | + FeatureMMX | FeatureMOVBE | FeatureMWAITX | FeaturePCLMUL | FeaturePOPCNT | FeaturePRFCHW | FeatureRDRND | FeatureRDSEED | FeatureSAHF | FeatureSHA | FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A | FeatureXSAVE | FeatureXSAVEC | @@ -472,6 +473,7 @@ constexpr FeatureBitset ImpliedFeaturesCMOV = {}; constexpr FeatureBitset ImpliedFeaturesCMPXCHG16B = {}; constexpr FeatureBitset ImpliedFeaturesCMPXCHG8B = {}; +constexpr FeatureBitset ImpliedFeaturesCRC32 = {}; constexpr FeatureBitset ImpliedFeaturesENQCMD = {}; constexpr FeatureBitset ImpliedFeaturesFSGSBASE = {}; constexpr FeatureBitset ImpliedFeaturesFXSR = {}; diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -42,6 +42,9 @@ def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true", "Support CMPXCHG8B instructions">; +def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true", + "Enable SSE 4.2 CRC32 instruction">; + def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", "Support POPCNT instruction">; @@ -624,9 +627,10 @@ FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, Feature64Bit ]; - list X86_64V2Features = !listconcat( - X86_64V1Features, - [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]); + list X86_64V2Features = !listconcat(X86_64V1Features, [ + FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureCRC32, FeaturePOPCNT, + FeatureSSE42 + ]); list X86_64V3Features = !listconcat(X86_64V2Features, [ FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT, FeatureMOVBE, FeatureXSAVE @@ -867,6 +871,7 @@ // Silvermont list SLMAdditionalFeatures = [FeatureSSE42, + FeatureCRC32, FeaturePOPCNT, FeaturePCLMUL, FeaturePRFCHW, @@ -957,6 +962,7 @@ FeatureNOPL, Feature64Bit, FeatureCMPXCHG16B, + FeatureCRC32, FeaturePOPCNT, FeaturePCLMUL, FeatureXSAVE, @@ -1033,6 +1039,7 @@ // Jaguar list BtVer2AdditionalFeatures = [FeatureAVX, FeatureAES, + FeatureCRC32, FeaturePCLMUL, FeatureBMI, FeatureF16C, @@ -1058,6 +1065,7 @@ Feature64Bit, FeatureCMPXCHG16B, FeatureAES, + FeatureCRC32, FeaturePRFCHW, FeaturePCLMUL, FeatureMMX, @@ -1115,6 +1123,7 @@ FeatureCMOV, Feature64Bit, FeatureCMPXCHG16B, + FeatureCRC32, FeatureF16C, FeatureFMA, FeatureFSGSBase, diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -750,18 +750,19 @@ : I, T8PD, Requires<[UseSSE42]>; -// SS42FI - SSE 4.2 instructions with T8XD prefix. -// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns. -class SS42FI o, Format F, dag outs, dag ins, string asm, - list pattern> - : I, T8XD, Requires<[HasSSE42]>; - // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI o, Format F, dag outs, dag ins, string asm, list pattern> : Ii8, TAPD, Requires<[UseSSE42]>; +// CRC32I - SSE 4.2 CRC32 instructions. +// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly +// controlled by the SSE42 flag. +class CRC32I o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, T8XD, Requires<[HasCRC32]>; + // AVX Instruction Templates: // Instructions introduced in AVX (no SSE equivalent forms) // diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -995,6 +995,7 @@ def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">; def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">; def HasUINTR : Predicate<"Subtarget->hasUINTR()">; +def HasCRC32 : Predicate<"Subtarget->hasCRC32()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -6588,14 +6588,14 @@ // of r and m. class SS42I_crc32r opc, string asm, RegisterClass RCOut, RegisterClass RCIn, SDPatternOperator Int> : - SS42FI, Sched<[WriteCRC32]>; class SS42I_crc32m opc, string asm, RegisterClass RCOut, X86MemOperand x86memop, SDPatternOperator Int> : - SS42FI, Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -428,6 +428,10 @@ /// Processor supports User Level Interrupt instructions bool HasUINTR = false; + /// Enable SSE4.2 CRC32 instruction (Used when SSE4.2 is supported but + /// function is GPR only) + bool HasCRC32 = false; + /// Processor has a single uop BEXTR implementation. bool HasFastBEXTR = false; @@ -767,6 +771,7 @@ bool hasSERIALIZE() const { return HasSERIALIZE; } bool hasTSXLDTRK() const { return HasTSXLDTRK; } bool hasUINTR() const { return HasUINTR; } + bool hasCRC32() const { return HasCRC32; } bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; } bool useRetpolineIndirectBranches() const { return UseRetpolineIndirectBranches; diff --git a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll rename from llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll rename to llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll --- a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64 + +define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { +; X86-LABEL: crc32_32_8: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: crc32_32_8: +; X64: ## %bb.0: +; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] +; X64-NEXT: retq ## encoding: [0xc3] + %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) + ret i32 %tmp +} +declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind + +define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { +; X86-LABEL: crc32_32_16: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: crc32_32_16: +; X64: ## %bb.0: +; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] +; X64-NEXT: retq ## encoding: [0xc3] + %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) + ret i32 %tmp +} +declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind + +define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { +; X86-LABEL: crc32_32_32: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: crc32_32_32: +; X64: ## %bb.0: +; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6] +; X64-NEXT: retq ## encoding: [0xc3] + %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) + ret i32 %tmp +} +declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind diff --git a/llvm/test/CodeGen/X86/sse42-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll rename from llvm/test/CodeGen/X86/sse42-intrinsics-x86_64.ll rename to llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll --- a/llvm/test/CodeGen/X86/sse42-intrinsics-x86_64.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse4.2 -show-mc-encoding | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind @@ -25,4 +23,3 @@ %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b) ret i64 %tmp } - diff --git a/llvm/test/CodeGen/X86/crc32-target-feature.ll b/llvm/test/CodeGen/X86/crc32-target-feature.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/crc32-target-feature.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +define i32 @test1(i32 %a, i8 %b) nounwind #0 { +; CHECK-LABEL: test1: +; CHECK: crc32b + %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) + ret i32 %tmp +} + +define i32 @test2(i32 %a, i8 %b) nounwind #1 { +; CHECK-LABEL: test2: +; CHECK: crc32b + %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) + ret i32 %tmp +} + +define i32 @test3(i32 %a, i8 %b) nounwind #2 { +; CHECK-LABEL: test3: +; CHECK: crc32b + %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) + ret i32 %tmp +} + +declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind + +attributes #0 = { "target-features"="+crc32" } +attributes #1 = { "target-features"="+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop,+crc32" } +attributes #2 = { "target-features"="+crc32,+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" } diff --git a/llvm/test/CodeGen/X86/function-subtarget-features.ll b/llvm/test/CodeGen/X86/function-subtarget-features.ll --- a/llvm/test/CodeGen/X86/function-subtarget-features.ll +++ b/llvm/test/CodeGen/X86/function-subtarget-features.ll @@ -78,4 +78,4 @@ attributes #0 = { "target-cpu"="x86-64" "target-features"="+avx2" } attributes #1 = { "target-cpu"="x86-64" } attributes #2 = { "target-cpu"="corei7" "target-features"="+sse4.2" } -attributes #3 = { "target-cpu"="x86-64" "target-features"="+avx2,+aes" } +attributes #3 = { "target-cpu"="x86-64" "target-features"="+avx2,+aes,+crc32" } diff --git a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -1,10 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE -; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,AVX1 -; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,AVX512 -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=X64,SSE,X64-SSE -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,AVX1 -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,AVX512 +; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,X86-SSE +; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.2,-crc32 | FileCheck %s --check-prefixes=SSE,X86-SSE +; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X86-AVX,AVX1 +; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,X86-AVX,AVX512 +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,X64-SSE +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,-crc32 | FileCheck %s --check-prefixes=SSE,X64-SSE +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X64-AVX,AVX1 +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,X64-AVX,AVX512 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c @@ -500,54 +502,3 @@ ret i32 %res } declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone - -define i32 @test_mm_crc32_u8(i32 %a0, i8 %a1) { -; X86-LABEL: test_mm_crc32_u8: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl -; -; X64-LABEL: test_mm_crc32_u8: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: crc32b %sil, %eax -; X64-NEXT: retq - %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) - ret i32 %res -} -declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone - -define i32 @test_mm_crc32_u16(i32 %a0, i16 %a1) { -; X86-LABEL: test_mm_crc32_u16: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl -; -; X64-LABEL: test_mm_crc32_u16: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: crc32w %si, %eax -; X64-NEXT: retq - %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) - ret i32 %res -} -declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone - -define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) { -; X86-LABEL: test_mm_crc32_u32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl -; -; X64-LABEL: test_mm_crc32_u32: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: crc32l %esi, %eax -; X64-NEXT: retq - %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) - ret i32 %res -} -declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone diff --git a/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll --- a/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll @@ -1,10 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=X86,SSE,X86-SSE -; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,X86-AVX1 -; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,X86-AVX512 -; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=X64,SSE,X64-SSE -; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,X64-AVX1 -; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,X64-AVX512 +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X86-SSE +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.2,-crc32 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X86-SSE +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X86-AVX,X86-AVX1 +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X86-AVX,X86-AVX512 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X64-SSE +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.2,-crc32 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X64-SSE +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X64-AVX,X64-AVX1 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X64-AVX,X64-AVX512 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { ; SSE-LABEL: test_x86_sse42_pcmpestri128: @@ -616,54 +618,3 @@ %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } - -define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { -; X86-LABEL: crc32_32_8: -; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08] -; X86-NEXT: retl ## encoding: [0xc3] -; -; X64-LABEL: crc32_32_8: -; X64: ## %bb.0: -; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] -; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] -; X64-NEXT: retq ## encoding: [0xc3] - %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) - ret i32 %tmp -} -declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind - -define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { -; X86-LABEL: crc32_32_16: -; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08] -; X86-NEXT: retl ## encoding: [0xc3] -; -; X64-LABEL: crc32_32_16: -; X64: ## %bb.0: -; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] -; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] -; X64-NEXT: retq ## encoding: [0xc3] - %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) - ret i32 %tmp -} -declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind - -define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { -; X86-LABEL: crc32_32_32: -; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08] -; X86-NEXT: retl ## encoding: [0xc3] -; -; X64-LABEL: crc32_32_32: -; X64: ## %bb.0: -; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] -; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6] -; X64-NEXT: retq ## encoding: [0xc3] - %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) - ret i32 %tmp -} -declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind diff --git a/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll b/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll --- a/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll +++ b/llvm/test/CodeGen/X86/stack-folding-int-sse42.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+aes,+pclmul < %s | FileCheck %s +; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+aes,+crc32,+pclmul < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown"