diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3030,6 +3030,58 @@ updateValueMap(II, ResultReg); return true; } + case Intrinsic::x86_sse42_crc32_32_8: + case Intrinsic::x86_sse42_crc32_32_16: + case Intrinsic::x86_sse42_crc32_32_32: + case Intrinsic::x86_sse42_crc32_64_64: { + if (!Subtarget->hasCRC32()) + return false; + + Type *RetTy = II->getCalledFunction()->getReturnType(); + + MVT VT; + if (!isTypeLegal(RetTy, VT)) + return false; + + unsigned Opc; + const TargetRegisterClass *RC = nullptr; + + switch (II->getIntrinsicID()) { + default: + llvm_unreachable("Unexpected intrinsic."); + case Intrinsic::x86_sse42_crc32_32_8: + Opc = X86::CRC32r32r8; + RC = &X86::GR32RegClass; + break; + case Intrinsic::x86_sse42_crc32_32_16: + Opc = X86::CRC32r32r16; + RC = &X86::GR32RegClass; + break; + case Intrinsic::x86_sse42_crc32_32_32: + Opc = X86::CRC32r32r32; + RC = &X86::GR32RegClass; + break; + case Intrinsic::x86_sse42_crc32_64_64: + Opc = X86::CRC32r64r64; + RC = &X86::GR64RegClass; + break; + } + + const Value *LHS = II->getArgOperand(0); + const Value *RHS = II->getArgOperand(1); + + Register LHSReg = getRegForValue(LHS); + Register RHSReg = getRegForValue(RHS); + if (!LHSReg || !RHSReg) + return false; + + Register ResultReg = fastEmitInst_rr(Opc, RC, LHSReg, RHSReg); + if (!ResultReg) + return false; + + updateValueMap(II, ResultReg); + return true; + } } } diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll --- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll @@ -1,22 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c -define i64 @test_mm_crc64_u8(i64 %a0, i8 %a1) nounwind{ -; CHECK-LABEL: test_mm_crc64_u8: +; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments. +define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind{ +; CHECK-LABEL: test_mm_crc32_u8: ; CHECK: # %bb.0: -; CHECK-NEXT: crc32b %sil, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: crc32b %sil, %eax ; CHECK-NEXT: retq - %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1) - ret i64 %res + %trunc = trunc i32 %a1 to i8 + %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc) + ret i32 %res +} +declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone + +; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments. +define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind{ +; CHECK-LABEL: test_mm_crc32_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: crc32w %si, %eax +; CHECK-NEXT: retq + %trunc = trunc i32 %a1 to i16 + %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc) + ret i32 %res +} +declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone + +define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind{ +; CHECK-LABEL: test_mm_crc32_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: crc32l %esi, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) + ret i32 %res } -declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone +declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone -define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{ -; CHECK-LABEL: test_mm_crc64_u64: +define i64 @test_mm_crc32_u64(i64 %a0, i64 %a1) nounwind{ +; CHECK-LABEL: test_mm_crc32_u64: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: crc32q %rsi, %rax