diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1563,8 +1563,10 @@ setOperationAction(ISD::FP_EXTEND, VT, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom); } - for (unsigned Opc : { ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV }) + for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) { setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32); + setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32); + } setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal); @@ -1875,8 +1877,10 @@ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom); setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal); - for (unsigned Opc : { ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV }) + for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) { setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32); + setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32); + } for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::MLOAD, VT, Legal); diff --git a/llvm/test/CodeGen/X86/avx512-f16c-v16f16-fadd.ll b/llvm/test/CodeGen/X86/avx512-f16c-v16f16-fadd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-f16c-v16f16-fadd.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=f16c| FileCheck %s --check-prefixes=CHECK + +define <16 x half> @foo(<16 x half> %a, <16 x half> %b) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-32, %rsp +; CHECK-NEXT: subq $96, %rsp +; CHECK-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovaps %ymm0, (%rsp) +; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm0 +; CHECK-NEXT: vcvtph2ps (%rsp), %ymm1 +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm1 +; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm2 +; CHECK-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; CHECK-NEXT: vcvtps2ph $4, %ymm1, %xmm1 +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + %1 = fadd <16 x half> %a, %b + ret <16 x half> %1 +} diff --git a/llvm/test/CodeGen/X86/avx512-skx-v32f16-fadd.ll b/llvm/test/CodeGen/X86/avx512-skx-v32f16-fadd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-skx-v32f16-fadd.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f| FileCheck %s --check-prefixes=CHECK + +define <32 x half> @foo(<32 x half> %a, <32 x half> %b) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-64, %rsp +; CHECK-NEXT: subq $192, %rsp +; CHECK-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovaps %zmm0, (%rsp) +; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm0 +; CHECK-NEXT: vcvtph2ps (%rsp), %zmm1 +; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vcvtps2ph $4, %zmm0, %ymm0 +; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm1 +; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm2 +; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1 +; CHECK-NEXT: vcvtps2ph $4, %zmm1, %ymm1 +; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + %1 = fadd <32 x half> %a, %b + ret <32 x half> %1 +}