diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4026,12 +4026,14 @@ Results.push_back(CallResult.second); break; } + case ISD::FMINIMUM: case ISD::FMINNUM: case ISD::STRICT_FMINNUM: ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128, Results); break; + case ISD::FMAXIMUM: case ISD::FMAXNUM: case ISD::STRICT_FMAXNUM: ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, @@ -4788,6 +4790,8 @@ case ISD::FREM: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: case ISD::FPOW: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); diff --git a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK + +declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>) + +declare <2 x fp128> @llvm.maximum.v2f128(<2 x fp128>, <2 x fp128>) + +; Fixes PR63267 +define <4 x half> @fmaximum_v4f16(<4 x half> %x, <4 x half> %y) { +; CHECK-LABEL: fmaximum_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h2, v1.h[1] +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: fcvt s4, h1 +; CHECK-NEXT: fcvt s5, h0 +; CHECK-NEXT: mov h6, v1.h[2] +; CHECK-NEXT: mov h7, v0.h[2] +; CHECK-NEXT: mov h1, v1.h[3] +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fmax s4, s5, s4 +; CHECK-NEXT: fcvt s5, h7 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fmax s2, s3, s2 +; CHECK-NEXT: fcvt s3, h6 +; CHECK-NEXT: mov h6, v0.h[3] +; CHECK-NEXT: fcvt h0, s4 +; CHECK-NEXT: fcvt h2, s2 +; CHECK-NEXT: fmax s3, s5, s3 +; CHECK-NEXT: fcvt s4, h6 +; CHECK-NEXT: mov v0.h[1], v2.h[0] +; CHECK-NEXT: fcvt h2, s3 +; CHECK-NEXT: fmax s1, s4, s1 +; CHECK-NEXT: mov v0.h[2], v2.h[0] +; CHECK-NEXT: fcvt h1, s1 +; CHECK-NEXT: mov v0.h[3], v1.h[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %r = call <4 x half> @llvm.maximum.v4f16(<4 x half> %x, <4 x half> %y) + ret <4 x half> %r +} + +define <2 x fp128> @fmaximum_v2f128(<2 x fp128> %x, <2 x fp128> %y) { +; CHECK-LABEL: fmaximum_v2f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill +; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: bl fmaxl +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload +; CHECK-NEXT: bl fmaxl +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %r = call <2 x fp128> @llvm.maximum.v2f128(<2 x fp128> %x, <2 x fp128> %y) + ret <2 x fp128> %r +}