diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1951,6 +1951,9 @@ setOperationAction(ISD::SETCC, MVT::f16, Custom); setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); + setOperationAction(ISD::FROUND, MVT::f16, Custom); + setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote); setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); diff --git a/llvm/test/CodeGen/X86/fp-round.ll b/llvm/test/CodeGen/X86/fp-round.ll --- a/llvm/test/CodeGen/X86/fp-round.ll +++ b/llvm/test/CodeGen/X86/fp-round.ll @@ -3,6 +3,63 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=AVX512FP16 + +define half @round_f16(half %h) { +; SSE2-LABEL: round_f16: +; SSE2: ## %bb.0: +; SSE2-NEXT: pushq %rax +; SSE2-NEXT: .cfi_def_cfa_offset 16 +; SSE2-NEXT: movzwl %di, %edi +; SSE2-NEXT: callq ___extendhfsf2 +; SSE2-NEXT: callq _roundf +; SSE2-NEXT: callq ___truncsfhf2 +; SSE2-NEXT: popq %rcx +; SSE2-NEXT: retq +; +; SSE41-LABEL: round_f16: +; SSE41: ## %bb.0: +; SSE41-NEXT: pushq %rax +; SSE41-NEXT: .cfi_def_cfa_offset 16 +; SSE41-NEXT: movzwl %di, %edi +; SSE41-NEXT: callq ___extendhfsf2 +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; SSE41-NEXT: andps %xmm0, %xmm1 +; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: addss %xmm0, %xmm1 +; SSE41-NEXT: xorps %xmm0, %xmm0 +; SSE41-NEXT: roundss $11, %xmm1, %xmm0 +; SSE41-NEXT: callq ___truncsfhf2 +; SSE41-NEXT: popq %rcx +; SSE41-NEXT: retq +; +; AVX1-LABEL: round_f16: +; AVX1: ## %bb.0: +; AVX1-NEXT: pushq %rax +; AVX1-NEXT: .cfi_def_cfa_offset 16 +; AVX1-NEXT: movzwl %di, %edi +; AVX1-NEXT: callq ___extendhfsf2 +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1] +; AVX1-NEXT: vorps %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: callq ___truncsfhf2 +; AVX1-NEXT: popq %rcx +; AVX1-NEXT: retq +; +; AVX512FP16-LABEL: round_f16: +; AVX512FP16: # %bb.0: +; AVX512FP16-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; AVX512FP16-NEXT: vpbroadcastw {{.*#+}} xmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1] +; AVX512FP16-NEXT: vpternlogq $248, %xmm1, %xmm0, %xmm2 +; AVX512FP16-NEXT: vaddsh %xmm2, %xmm0, %xmm0 +; AVX512FP16-NEXT: vrndscalesh $11, %xmm0, %xmm0, %xmm0 +; AVX512FP16-NEXT: retq +entry: + %a = call half @llvm.round.f16(half %h) + ret half %a +} define float @round_f32(float %x) { ; SSE2-LABEL: round_f32: @@ -561,6 +618,7 @@ ret <8 x double> %a } +declare half @llvm.round.f16(half) declare float @llvm.round.f32(float) declare double @llvm.round.f64(double) declare <4 x float> @llvm.round.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/X86/fp-roundeven.ll b/llvm/test/CodeGen/X86/fp-roundeven.ll --- a/llvm/test/CodeGen/X86/fp-roundeven.ll +++ b/llvm/test/CodeGen/X86/fp-roundeven.ll @@ -3,6 +3,52 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=AVX512FP16 + +define half @roundeven_f16(half %h) { +; SSE-LABEL: roundeven_f16: +; SSE: ## %bb.0: +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movzwl %di, %edi +; SSE-NEXT: callq ___extendhfsf2 +; SSE-NEXT: callq _roundevenf +; SSE-NEXT: callq ___truncsfhf2 +; SSE-NEXT: popq %rcx +; SSE-NEXT: retq +; +; SSE4_1-LABEL: roundeven_f16: +; SSE4_1: ## %bb.0: +; SSE4_1-NEXT: pushq %rax +; SSE4_1-NEXT: .cfi_def_cfa_offset 16 +; SSE4_1-NEXT: movzwl %di, %edi +; SSE4_1-NEXT: callq ___extendhfsf2 +; SSE4_1-NEXT: roundss $8, %xmm0, %xmm0 +; SSE4_1-NEXT: callq ___truncsfhf2 +; SSE4_1-NEXT: popq %rcx +; SSE4_1-NEXT: retq +; +; AVX1-LABEL: roundeven_f16: +; AVX1: ## %bb.0: +; AVX1-NEXT: pushq %rax +; AVX1-NEXT: .cfi_def_cfa_offset 16 +; AVX1-NEXT: movzwl %di, %edi +; AVX1-NEXT: callq ___extendhfsf2 +; AVX1-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: callq ___truncsfhf2 +; AVX1-NEXT: popq %rcx +; AVX1-NEXT: retq +; +; AVX512FP16-LABEL: roundeven_f16: +; AVX512FP16: # %bb.0: +; AVX512FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 +; AVX512FP16-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0 +; AVX512FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 +; AVX512FP16-NEXT: retq +entry: + %a = call half @llvm.roundeven.f16(half %h) + ret half %a +} define float @roundeven_f32(float %x) { ; SSE2-LABEL: roundeven_f32: @@ -408,6 +454,7 @@ ret <8 x double> %a } +declare half @llvm.roundeven.f16(half) declare float @llvm.roundeven.f32(float) declare double @llvm.roundeven.f64(double) declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)