Index: clang/docs/ClangCommandLineReference.rst =================================================================== --- clang/docs/ClangCommandLineReference.rst +++ clang/docs/ClangCommandLineReference.rst @@ -3569,6 +3569,8 @@ .. option:: -mclzero, -mno-clzero +.. option:: -mcrc32, -mno-crc32 + .. option:: -mcx16, -mno-cx16 .. option:: -menqcmd, -mno-enqcmd Index: clang/include/clang/Basic/BuiltinsX86.def =================================================================== --- clang/include/clang/Basic/BuiltinsX86.def +++ clang/include/clang/Basic/BuiltinsX86.def @@ -421,9 +421,9 @@ TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","ncV:128:", "sse4.2") TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","ncV:128:", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "crc32") +TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "crc32") +TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "crc32") // SSE4a TARGET_BUILTIN(__builtin_ia32_extrqi, "V2OiV2OiIcIc", "ncV:128:", "sse4a") Index: clang/include/clang/Basic/BuiltinsX86_64.def =================================================================== --- clang/include/clang/Basic/BuiltinsX86_64.def +++ clang/include/clang/Basic/BuiltinsX86_64.def @@ -44,7 +44,7 @@ TARGET_BUILTIN(__builtin_ia32_movnti64, "vOi*Oi", "n", "sse2") TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "OiV2OiIi", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2OiV2OiOiIi", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "crc32") TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "OiV4OiIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4OiV4OiOiIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "n", "fsgsbase") Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -4149,6 +4149,8 @@ def mno_wbnoinvd : Flag<["-"], "mno-wbnoinvd">, Group; def mclzero : Flag<["-"], "mclzero">, Group; def mno_clzero : Flag<["-"], "mno-clzero">, Group; +def mcrc32 : Flag<["-"], "mcrc32">, Group; +def mno_crc32 : Flag<["-"], "mno-crc32">, Group; def mcx16 : Flag<["-"], "mcx16">, Group; def mno_cx16 : Flag<["-"], "mno-cx16">, Group; def menqcmd : Flag<["-"], "menqcmd">, Group; Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -142,6 +142,7 @@ bool HasSERIALIZE = false; bool HasTSXLDTRK = false; bool HasUINTR = false; + bool HasCRC32 = false; protected: llvm::X86::CPUKind CPU = llvm::X86::CK_None; Index: clang/lib/Basic/Targets/X86.cpp =================================================================== --- clang/lib/Basic/Targets/X86.cpp +++ clang/lib/Basic/Targets/X86.cpp @@ -327,6 +327,8 @@ HasTSXLDTRK = true; } else if (Feature == "+uintr") { HasUINTR = true; + } else if (Feature == "+crc32") { + HasCRC32 = true; } X86SSEEnum Level = llvm::StringSwitch(Feature) @@ -753,6 +755,8 @@ Builder.defineMacro("__TSXLDTRK__"); if (HasUINTR) Builder.defineMacro("__UINTR__"); + if (HasCRC32) + Builder.defineMacro("__CRC32__"); // Each case falls through to the previous one here. switch (SSELevel) { @@ -872,6 +876,7 @@ .Case("clflushopt", true) .Case("clwb", true) .Case("clzero", true) + .Case("crc32", true) .Case("cx16", true) .Case("enqcmd", true) .Case("f16c", true) @@ -963,6 +968,7 @@ .Case("clflushopt", HasCLFLUSHOPT) .Case("clwb", HasCLWB) .Case("clzero", HasCLZERO) + .Case("crc32", HasCRC32) .Case("cx8", HasCX8) .Case("cx16", HasCX16) .Case("enqcmd", HasENQCMD) Index: clang/lib/Headers/CMakeLists.txt =================================================================== --- clang/lib/Headers/CMakeLists.txt +++ clang/lib/Headers/CMakeLists.txt @@ -55,6 +55,7 @@ cet.h cldemoteintrin.h clzerointrin.h + crc32intrin.h cpuid.h clflushoptintrin.h clwbintrin.h Index: clang/lib/Headers/crc32intrin.h =================================================================== --- /dev/null +++ clang/lib/Headers/crc32intrin.h @@ -0,0 +1,100 @@ +/*===---- crc32intrin.h - SSE4.2 Accumulate CRC32 intrinsics ---------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CRC32INTRIN_H +#define __CRC32INTRIN_H + +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("crc32"))) + +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned char operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CRC32B instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u8(unsigned int __C, unsigned char __D) +{ + return __builtin_ia32_crc32qi(__C, __D); +} + +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned short operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CRC32W instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u16(unsigned int __C, unsigned short __D) +{ + return __builtin_ia32_crc32hi(__C, __D); +} + +/// Adds the first unsigned integer operand to the CRC-32C checksum of +/// the second unsigned integer operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CRC32L instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u32(unsigned int __C, unsigned int __D) +{ + return __builtin_ia32_crc32si(__C, __D); +} + +#ifdef __x86_64__ +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned 64-bit integer operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CRC32Q instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_mm_crc32_u64(unsigned long long __C, unsigned long long __D) +{ + return __builtin_ia32_crc32di(__C, __D); +} +#endif /* __x86_64__ */ + +#undef __DEFAULT_FN_ATTRS + +#endif /* __CRC32INTRIN_H */ Index: clang/lib/Headers/ia32intrin.h =================================================================== --- clang/lib/Headers/ia32intrin.h +++ clang/lib/Headers/ia32intrin.h @@ -17,6 +17,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #define __DEFAULT_FN_ATTRS_SSE42 __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +#define __DEFAULT_FN_ATTRS_CRC32 __attribute__((__always_inline__, __nodebug__, __target__("crc32"))) #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) constexpr @@ -282,7 +283,7 @@ * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42 +static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32b(unsigned int __C, unsigned char __D) { return __builtin_ia32_crc32qi(__C, __D); @@ -303,7 +304,7 @@ * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42 +static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32w(unsigned int __C, unsigned short __D) { return __builtin_ia32_crc32hi(__C, __D); @@ -324,7 +325,7 @@ * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42 +static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32d(unsigned int __C, unsigned int __D) { return __builtin_ia32_crc32si(__C, __D); @@ -346,7 +347,7 @@ * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned long long __DEFAULT_FN_ATTRS_SSE42 +static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CRC32 __crc32q(unsigned long long __C, unsigned long long __D) { return __builtin_ia32_crc32di(__C, __D); @@ -435,6 +436,7 @@ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_CAST +#undef __DEFAULT_FN_ATTRS_CRC32 #undef __DEFAULT_FN_ATTRS_SSE42 #undef __DEFAULT_FN_ATTRS_CONSTEXPR Index: clang/lib/Headers/immintrin.h =================================================================== --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -513,6 +513,11 @@ #include #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__CRC32__) +#include +#endif + #if defined(_MSC_VER) && __has_extension(gnu_asm) /* Define the default attributes for these intrinsics */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) Index: clang/lib/Headers/smmintrin.h =================================================================== --- clang/lib/Headers/smmintrin.h +++ clang/lib/Headers/smmintrin.h @@ -2340,91 +2340,10 @@ return (__m128i)((__v2di)__V1 > (__v2di)__V2); } -/* SSE4.2 Accumulate CRC32. */ -/// Adds the unsigned integer operand to the CRC-32C checksum of the -/// unsigned char operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the CRC32B instruction. -/// -/// \param __C -/// An unsigned integer operand to add to the CRC-32C checksum of operand -/// \a __D. -/// \param __D -/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum. -/// \returns The result of adding operand \a __C to the CRC-32C checksum of -/// operand \a __D. -static __inline__ unsigned int __DEFAULT_FN_ATTRS -_mm_crc32_u8(unsigned int __C, unsigned char __D) -{ - return __builtin_ia32_crc32qi(__C, __D); -} - -/// Adds the unsigned integer operand to the CRC-32C checksum of the -/// unsigned short operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the CRC32W instruction. -/// -/// \param __C -/// An unsigned integer operand to add to the CRC-32C checksum of operand -/// \a __D. -/// \param __D -/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum. -/// \returns The result of adding operand \a __C to the CRC-32C checksum of -/// operand \a __D. -static __inline__ unsigned int __DEFAULT_FN_ATTRS -_mm_crc32_u16(unsigned int __C, unsigned short __D) -{ - return __builtin_ia32_crc32hi(__C, __D); -} - -/// Adds the first unsigned integer operand to the CRC-32C checksum of -/// the second unsigned integer operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the CRC32L instruction. -/// -/// \param __C -/// An unsigned integer operand to add to the CRC-32C checksum of operand -/// \a __D. -/// \param __D -/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum. -/// \returns The result of adding operand \a __C to the CRC-32C checksum of -/// operand \a __D. -static __inline__ unsigned int __DEFAULT_FN_ATTRS -_mm_crc32_u32(unsigned int __C, unsigned int __D) -{ - return __builtin_ia32_crc32si(__C, __D); -} - -#ifdef __x86_64__ -/// Adds the unsigned integer operand to the CRC-32C checksum of the -/// unsigned 64-bit integer operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the CRC32Q instruction. -/// -/// \param __C -/// An unsigned integer operand to add to the CRC-32C checksum of operand -/// \a __D. -/// \param __D -/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum. -/// \returns The result of adding operand \a __C to the CRC-32C checksum of -/// operand \a __D. -static __inline__ unsigned long long __DEFAULT_FN_ATTRS -_mm_crc32_u64(unsigned long long __C, unsigned long long __D) -{ - return __builtin_ia32_crc32di(__C, __D); -} -#endif /* __x86_64__ */ - #undef __DEFAULT_FN_ATTRS #include +#include + #endif /* __SMMINTRIN_H */ Index: clang/test/CodeGen/X86/x86-crc-builtins.c =================================================================== --- clang/test/CodeGen/X86/x86-crc-builtins.c +++ clang/test/CodeGen/X86/x86-crc-builtins.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK64 // RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +crc32 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +crc32 -emit-llvm -o - -Wall -Werror | FileCheck %s #include @@ -28,3 +30,29 @@ return __crc32q(CRC, V); } #endif + +unsigned int test_mm_crc32_u8(unsigned int CRC, unsigned char V) { +// CHECK-LABEL: test_mm_crc32_u8 +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) + return _mm_crc32_u8(CRC, V); +} + +unsigned int test_mm_crc32_u16(unsigned int CRC, unsigned short V) { +// CHECK-LABEL: test_mm_crc32_u16 +// CHECK: call i32 @llvm.x86.sse42.crc32.32.16(i32 %{{.*}}, i16 %{{.*}}) + return _mm_crc32_u16(CRC, V); +} + +unsigned int test_mm_crc32_u32(unsigned int CRC, unsigned int V) { +// CHECK-LABEL: test_mm_crc32_u32 +// CHECK: call i32 @llvm.x86.sse42.crc32.32.32(i32 %{{.*}}, i32 %{{.*}}) + return _mm_crc32_u32(CRC, V); +} + +#ifdef __x86_64__ +unsigned long long test_mm_crc32_u64(unsigned long long CRC, unsigned long long V) { +// CHECK64-LABEL: test_mm_crc32_u64 +// CHECK64: call i64 @llvm.x86.sse42.crc32.64.64(i64 %{{.*}}, i64 %{{.*}}) + return _mm_crc32_u64(CRC, V); +} +#endif Index: clang/test/CodeGen/attr-cpuspecific.c =================================================================== --- clang/test/CodeGen/attr-cpuspecific.c +++ clang/test/CodeGen/attr-cpuspecific.c @@ -270,6 +270,6 @@ // WINDOWS: define dso_local i32 @DispatchFirst.B // WINDOWS: ret i32 1 -// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" -// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" // CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87" Index: clang/test/CodeGen/attr-target-crc32-x86.c =================================================================== --- /dev/null +++ clang/test/CodeGen/attr-target-crc32-x86.c @@ -0,0 +1,38 @@ +// Test crc32 target attribute on x86 + +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s + +// CHECK: define{{.*}} i32 @test1({{.*}}) [[TEST1_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) +unsigned int __attribute__((target("crc32"))) test1(unsigned int CRC, unsigned char V) { + return __builtin_ia32_crc32qi(CRC, V); +} + +// CHECK: define{{.*}} i32 @test2({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) +unsigned int __attribute__((target("general-regs-only,crc32"))) test2(unsigned int CRC, unsigned char V) { + return __builtin_ia32_crc32qi(CRC, V); +} + +// CHECK: define{{.*}} i32 @test3({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) +unsigned int __attribute__((target("crc32,general-regs-only"))) test3(unsigned int CRC, unsigned char V) { + return __builtin_ia32_crc32qi(CRC, V); +} + +// CHECK: define{{.*}} i32 @test4({{.*}}) [[TEST4_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) +unsigned int __attribute__((target("sse4.2"))) test4(unsigned int CRC, unsigned char V) { + return __builtin_ia32_crc32qi(CRC, V); +} + +// CHECK: define{{.*}} i32 @test5({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]] +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) +unsigned int __attribute__((target("sse4.2,general-regs-only"))) test5(unsigned int CRC, unsigned char V) { + return __builtin_ia32_crc32qi(CRC, V); +} + +// CHECK: attributes [[TEST1_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}" +// CHECK: attributes [[GPR_ONLY_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}" +// CHECK: attributes [[TEST4_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}+sse4.2{{.*}}" Index: clang/test/CodeGen/attr-target-x86.c =================================================================== --- clang/test/CodeGen/attr-target-x86.c +++ clang/test/CodeGen/attr-target-x86.c @@ -52,12 +52,12 @@ // CHECK: use_before_def{{.*}} #7 // CHECK: walrus{{.*}} #8 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686" -// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" +// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" // CHECK-NOT: tune-cpu // CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" -// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" +// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" // CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" -// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" +// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" // CHECK-NOT: tune-cpu // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx" // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" @@ -65,8 +65,8 @@ // CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="sandybridge" // CHECK: "target-cpu"="x86-64-v2" -// CHECK-SAME: "target-features"="+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" +// CHECK-SAME: "target-features"="+crc32,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" // CHECK: "target-cpu"="x86-64-v3" -// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" // CHECK: "target-cpu"="x86-64-v4" -// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" Index: clang/test/Driver/x86-mgeneral-regs-only-crc32.c =================================================================== --- /dev/null +++ clang/test/Driver/x86-mgeneral-regs-only-crc32.c @@ -0,0 +1,50 @@ +// Test the -mgeneral-regs-only with -mcrc32 option on x86 + +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s +// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s +// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s +// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s + +// RUN: not %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: not %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s +// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s +// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s +// RUN: %clang -target i386-unknown-linux-gnu -mno-crc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mno-crc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s + +// CMD-BEFORE: "-target-feature" "+crc32" +// CMD: "-target-feature" "-x87" +// CMD: "-target-feature" "-mmx" +// CMD: "-target-feature" "-sse" +// CMD-AFTER: "-target-feature" "+crc32" + +unsigned int test__crc32b(unsigned int CRC, unsigned char V) { +// CHECK-LABEL: test__crc32b +// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}}) + return __builtin_ia32_crc32qi(CRC, V); +} + +// ERROR: error: '__builtin_ia32_crc32qi' needs target feature crc32 + +// IR-GPR: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}" +// IR-AVX2: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+avx{{.*}}+avx2{{.*}}+crc32{{.*}}+sse{{.*}}+sse2{{.*}}+ssse3{{.*}}-avx512f{{.*}}-x87{{.*}}" Index: clang/test/Driver/x86-target-features.c =================================================================== --- clang/test/Driver/x86-target-features.c +++ clang/test/Driver/x86-target-features.c @@ -293,3 +293,8 @@ // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avxvnni %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-AVX-VNNI %s // AVX-VNNI: "-target-feature" "+avxvnni" // NO-AVX-VNNI: "-target-feature" "-avxvnni" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CRC32 %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CRC32 %s +// CRC32: "-target-feature" "+crc32" +// NO-CRC32: "-target-feature" "-crc32" Index: clang/test/Preprocessor/x86_target_features.c =================================================================== --- clang/test/Preprocessor/x86_target_features.c +++ clang/test/Preprocessor/x86_target_features.c @@ -558,3 +558,11 @@ // AVXVNNINOAVX2-NOT: #define __AVX2__ 1 // AVXVNNINOAVX2-NOT: #define __AVXVNNI__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s + +// CRC32: #define __CRC32__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 -x c -E -dM -o - %s | FileCheck -check-prefix=NOCRC32 %s + +// NOCRC32-NOT: #define __CRC32__ 1 Index: llvm/include/llvm/Support/X86TargetParser.def =================================================================== --- llvm/include/llvm/Support/X86TargetParser.def +++ llvm/include/llvm/Support/X86TargetParser.def @@ -153,6 +153,7 @@ X86_FEATURE (CLZERO, "clzero") X86_FEATURE (CMPXCHG16B, "cx16") X86_FEATURE (CMPXCHG8B, "cx8") +X86_FEATURE (CRC32, "crc32") X86_FEATURE (ENQCMD, "enqcmd") X86_FEATURE (F16C, "f16c") X86_FEATURE (FSGSBASE, "fsgsbase") Index: llvm/lib/Support/Host.cpp =================================================================== --- llvm/lib/Support/Host.cpp +++ llvm/lib/Support/Host.cpp @@ -1035,8 +1035,10 @@ setFeature(X86::FEATURE_FMA); if ((ECX >> 19) & 1) setFeature(X86::FEATURE_SSE4_1); - if ((ECX >> 20) & 1) + if ((ECX >> 20) & 1) { setFeature(X86::FEATURE_SSE4_2); + setFeature(X86::FEATURE_CRC32); + } if ((ECX >> 23) & 1) setFeature(X86::FEATURE_POPCNT); if ((ECX >> 25) & 1) @@ -1482,6 +1484,7 @@ Features["cx16"] = (ECX >> 13) & 1; Features["sse4.1"] = (ECX >> 19) & 1; Features["sse4.2"] = (ECX >> 20) & 1; + Features["crc32"] = Features["sse4.2"]; Features["movbe"] = (ECX >> 22) & 1; Features["popcnt"] = (ECX >> 23) & 1; Features["aes"] = (ECX >> 25) & 1; Index: llvm/lib/Support/X86TargetParser.cpp =================================================================== --- llvm/lib/Support/X86TargetParser.cpp +++ llvm/lib/Support/X86TargetParser.cpp @@ -470,6 +470,7 @@ constexpr FeatureBitset ImpliedFeaturesCMOV = {}; constexpr FeatureBitset ImpliedFeaturesCMPXCHG16B = {}; constexpr FeatureBitset ImpliedFeaturesCMPXCHG8B = {}; +constexpr FeatureBitset ImpliedFeaturesCRC32 = {}; constexpr FeatureBitset ImpliedFeaturesENQCMD = {}; constexpr FeatureBitset ImpliedFeaturesFSGSBASE = {}; constexpr FeatureBitset ImpliedFeaturesFXSR = {}; @@ -527,7 +528,7 @@ constexpr FeatureBitset ImpliedFeaturesSSE3 = FeatureSSE2; constexpr FeatureBitset ImpliedFeaturesSSSE3 = FeatureSSE3; constexpr FeatureBitset ImpliedFeaturesSSE4_1 = FeatureSSSE3; -constexpr FeatureBitset ImpliedFeaturesSSE4_2 = FeatureSSE4_1; +constexpr FeatureBitset ImpliedFeaturesSSE4_2 = FeatureSSE4_1 | FeatureCRC32; constexpr FeatureBitset ImpliedFeaturesAVX = FeatureSSE4_2; constexpr FeatureBitset ImpliedFeaturesAVX2 = FeatureAVX; constexpr FeatureBitset ImpliedFeaturesAVX512F = Index: llvm/lib/Target/X86/X86.td =================================================================== --- llvm/lib/Target/X86/X86.td +++ llvm/lib/Target/X86/X86.td @@ -77,9 +77,11 @@ def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", "Enable SSE 4.1 instructions", [FeatureSSSE3]>; +def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true", + "Enable SSE 4.2 CRC32 instruction">; def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", - [FeatureSSE41]>; + [FeatureSSE41, FeatureCRC32]>; // The MMX subtarget feature is separate from the rest of the SSE features // because it's important (for odd compatibility reasons) to be able to // turn it off explicitly while allowing SSE+ to be on. Index: llvm/lib/Target/X86/X86InstrFormats.td =================================================================== --- llvm/lib/Target/X86/X86InstrFormats.td +++ llvm/lib/Target/X86/X86InstrFormats.td @@ -738,18 +738,19 @@ : I, T8PD, Requires<[UseSSE42]>; -// SS42FI - SSE 4.2 instructions with T8XD prefix. -// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns. -class SS42FI o, Format F, dag outs, dag ins, string asm, - list pattern> - : I, T8XD, Requires<[HasSSE42]>; - // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI o, Format F, dag outs, dag ins, string asm, list pattern> : Ii8, TAPD, Requires<[UseSSE42]>; +// CRC32I - SSE 4.2 CRC32 instructions. +// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly +// controlled by the SSE42 flag. +class CRC32I o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, T8XD, Requires<[HasCRC32]>; + // AVX Instruction Templates: // Instructions introduced in AVX (no SSE equivalent forms) // Index: llvm/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.td +++ llvm/lib/Target/X86/X86InstrInfo.td @@ -993,6 +993,7 @@ def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">; def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">; def HasUINTR : Predicate<"Subtarget->hasUINTR()">; +def HasCRC32 : Predicate<"Subtarget->hasCRC32()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, Index: llvm/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/lib/Target/X86/X86InstrSSE.td +++ llvm/lib/Target/X86/X86InstrSSE.td @@ -6572,14 +6572,14 @@ // of r and m. class SS42I_crc32r opc, string asm, RegisterClass RCOut, RegisterClass RCIn, SDPatternOperator Int> : - SS42FI, Sched<[WriteCRC32]>; class SS42I_crc32m opc, string asm, RegisterClass RCOut, X86MemOperand x86memop, SDPatternOperator Int> : - SS42FI, Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>; Index: llvm/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/lib/Target/X86/X86Subtarget.h +++ llvm/lib/Target/X86/X86Subtarget.h @@ -425,6 +425,10 @@ /// Processor supports User Level Interrupt instructions bool HasUINTR = false; + /// Enable SSE4.2 CRC32 instruction (Used when SSE4.2 is supported but + /// function is GPR only) + bool HasCRC32 = false; + /// Processor has a single uop BEXTR implementation. bool HasFastBEXTR = false; @@ -763,6 +767,7 @@ bool hasSERIALIZE() const { return HasSERIALIZE; } bool hasTSXLDTRK() const { return HasTSXLDTRK; } bool hasUINTR() const { return HasUINTR; } + bool hasCRC32() const { return HasCRC32; } bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; } bool useRetpolineIndirectBranches() const { return UseRetpolineIndirectBranches; Index: llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll =================================================================== --- llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll +++ llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll @@ -1,4 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s Index: llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X64 + +define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { +; X86-LABEL: crc32_32_8: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: crc32_32_8: +; X64: ## %bb.0: +; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] +; X64-NEXT: retq ## encoding: [0xc3] + %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) + ret i32 %tmp +} +declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind + +define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { +; X86-LABEL: crc32_32_16: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: crc32_32_16: +; X64: ## %bb.0: +; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] +; X64-NEXT: retq ## encoding: [0xc3] + %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) + ret i32 %tmp +} +declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind + +define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { +; X86-LABEL: crc32_32_32: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: crc32_32_32: +; X64: ## %bb.0: +; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6] +; X64-NEXT: retq ## encoding: [0xc3] + %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) + ret i32 %tmp +} +declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind Index: llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll =================================================================== --- llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll +++ llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse4.2 -show-mc-encoding | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s @@ -25,4 +26,3 @@ %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b) ret i64 %tmp } - Index: llvm/test/CodeGen/X86/crc32-target-feature.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/crc32-target-feature.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +define i32 @test1(i32 %a, i8 %b) nounwind #0 { +; CHECK-LABEL: test1: +; CHECK: crc32b + %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) + ret i32 %tmp +} + +define i32 @test2(i32 %a, i8 %b) nounwind #1 { +; CHECK-LABEL: test2: +; CHECK: crc32b + %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) + ret i32 %tmp +} + +define i32 @test3(i32 %a, i8 %b) nounwind #2 { +; CHECK-LABEL: test3: +; CHECK: crc32b + %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) + ret i32 %tmp +} + +define i32 @test4(i32 %a, i8 %b) nounwind #3 { +; CHECK-LABEL: test4: +; CHECK: crc32b + %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) + ret i32 %tmp +} + +define i32 @test5(i32 %a, i8 %b) nounwind #4 { +; CHECK-LABEL: test5: +; CHECK: crc32b + %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) + ret i32 %tmp +} + +declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind + +attributes #0 = { "target-features"="+sse,+sse2,+sse4.2" } +attributes #1 = { "target-features"="+crc32" } +attributes #2 = { "target-features"="+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop,+crc32" } +attributes #3 = { "target-features"="+crc32,+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" } +attributes #4 = { "target-features"="+avx2" }