diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1420,12 +1420,16 @@ } if (Subtarget->hasNEON()) { - // vmin and vmax aren't available in a scalar form, so we use - // a NEON instruction with an undef lane instead. - setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); - setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); - setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); - setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); + // vmin and vmax aren't available in a scalar form, so we can use + // a NEON instruction with an undef lane instead. This has a performance + // penalty on some cores, so we don't do this unless we have been + // asked to by the core tuning model. + if (Subtarget->useNEONForSinglePrecisionFP()) { + setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); + setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); + } setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal); setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll --- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll +++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll @@ -446,7 +446,9 @@ ; CHECK-LABEL: test_minimum: ; CHECK: vldr.16 s2, [r0] ; CHECK-NEXT: vmov.f16 s0, #1.000000e+00 -; CHECK-NEXT: vmin.f16 d0, d1, d0 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s0, s2 ; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: bx lr %a = load half, half* %p, align 2 @@ -460,7 +462,9 @@ ; CHECK-LABEL: test_maximum: ; CHECK: vldr.16 s2, [r0] ; CHECK-NEXT: vmov.f16 s0, #1.000000e+00 -; CHECK-NEXT: vmax.f16 d0, d1, d0 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s0, s2 ; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: bx lr %a = load half, half* %p, align 2 diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll --- a/llvm/test/CodeGen/ARM/fp16-promote.ll +++ b/llvm/test/CodeGen/ARM/fp16-promote.ll @@ -665,7 +665,9 @@ ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL-VFP: vmov.f32 s{{[0-9]+}}, #1.000000e+00 ; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 -; CHECK-VFP: vmin.f32 +; CHECK-VFP: vcmp.f32 +; CHECK-VFP: vmrs +; CHECK-VFP: vmovlt.f32 ; CHECK-NOVFP: bl __aeabi_fcmpge ; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h @@ -683,7 +685,9 @@ ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL-VFP: vmov.f32 s0, #1.000000e+00 ; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 -; CHECK-VFP: vmax.f32 +; CHECK-VFP: vcmp.f32 +; CHECK-VFP: vmrs +; CHECK-VFP: vmovhi.f32 ; CHECK-NOVFP: bl __aeabi_fcmple ; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll --- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll +++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=armv8-eabi -mattr=+fullfp16 | FileCheck %s ; RUN: llc < %s -mtriple thumbv7a -mattr=+fullfp16 | FileCheck %s @@ -9,7 +10,14 @@ define half @fp16_vminnm_o(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vminnm_o: -; CHECK-NOT: vminnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r2 +; CHECK-NEXT: vmov.f16 s2, r1 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -20,7 +28,14 @@ define half @fp16_vminnm_o_rev(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vminnm_o_rev: -; CHECK-NOT: vminnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r2 +; CHECK-NEXT: vmov.f16 s2, r1 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -31,7 +46,14 @@ define half @fp16_vminnm_u(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vminnm_u: -; CHECK-NOT: vminnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, r2 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -42,7 +64,14 @@ define half @fp16_vminnm_ule(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vminnm_ule: -; CHECK-NOT: vminnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, r2 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -53,7 +82,14 @@ define half @fp16_vminnm_u_rev(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vminnm_u_rev: -; CHECK-NOT: vminnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r2 +; CHECK-NEXT: vmov.f16 s2, r1 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -64,7 +100,14 @@ define half @fp16_vmaxnm_o(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vmaxnm_o: -; CHECK-NOT: vmaxnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r2 +; CHECK-NEXT: vmov.f16 s2, r1 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -75,7 +118,14 @@ define half @fp16_vmaxnm_oge(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vmaxnm_oge: -; CHECK-NOT: vmaxnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r2 +; CHECK-NEXT: vmov.f16 s2, r1 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -86,7 +136,14 @@ define half @fp16_vmaxnm_o_rev(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vmaxnm_o_rev: -; CHECK-NOT: vmaxnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, r2 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -97,7 +154,14 @@ define half @fp16_vmaxnm_ole_rev(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vmaxnm_ole_rev: -; CHECK-NOT: vmaxnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, r2 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -108,7 +172,14 @@ define half @fp16_vmaxnm_u(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vmaxnm_u: -; CHECK-NOT: vmaxnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, r2 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -119,7 +190,14 @@ define half @fp16_vmaxnm_uge(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vmaxnm_uge: -; CHECK-NOT: vmaxnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, r2 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -130,7 +208,14 @@ define half @fp16_vmaxnm_u_rev(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: fp16_vmaxnm_u_rev: -; CHECK-NOT: vmaxnm.f16 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r2 +; CHECK-NEXT: vmov.f16 s2, r1 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %0 = bitcast i16 %a to half %1 = bitcast i16 %b to half @@ -142,12 +227,21 @@ ; known non-NaNs define half @fp16_vminnm_NNNo(i16 signext %a) { -; CHECK-LABEL: fp16_vminnm_NNNo: -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01 -; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}} -; CHECK: vminnm.f16 s2, [[S4]], [[S2]] -; CHECK: vmin.f16 d0, d1, d0 +; CHECK-LABEL: fp16_vminnm_NNNo: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, #1.200000e+01 +; CHECK-NEXT: vminnm.f16 s0, s0, s2 +; CHECK-NEXT: vldr.16 s2, .LCPI12_0 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI12_0: +; CHECK-NEXT: .short 0x5040 @ half 34 entry: %0 = bitcast i16 %a to half %cmp1 = fcmp olt half %0, 12. @@ -158,12 +252,23 @@ } define half @fp16_vminnm_NNNo_rev(i16 signext %a) { -; CHECK-LABEL: fp16_vminnm_NNNo_rev: -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}} -; CHECK: vmin.f16 d0, d1, d0 -; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} -; CHECK: vminnm.f16 s0, [[S0]], [[S2]] +; CHECK-LABEL: fp16_vminnm_NNNo_rev: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr.16 s2, .LCPI13_0 +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vldr.16 s2, .LCPI13_1 +; CHECK-NEXT: vminnm.f16 s0, s0, s2 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI13_0: +; CHECK-NEXT: .short 0x5300 @ half 56 +; CHECK-NEXT: .LCPI13_1: +; CHECK-NEXT: .short 0x54e0 @ half 78 entry: %0 = bitcast i16 %a to half %cmp1 = fcmp ogt half %0, 56. @@ -175,11 +280,20 @@ define half @fp16_vminnm_NNNu(i16 signext %b) { ; CHECK-LABEL: fp16_vminnm_NNNu: -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01 -; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}} -; CHECK: vminnm.f16 s2, [[S4]], [[S2]] -; CHECK: vmin.f16 d0, d1, d0 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, #1.200000e+01 +; CHECK-NEXT: vminnm.f16 s0, s0, s2 +; CHECK-NEXT: vldr.16 s2, .LCPI14_0 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI14_0: +; CHECK-NEXT: .short 0x5040 @ half 34 entry: %0 = bitcast i16 %b to half %cmp1 = fcmp ult half 12., %0 @@ -190,12 +304,23 @@ } define half @fp16_vminnm_NNNule(i16 signext %b) { -; CHECK-LABEL: fp16_vminnm_NNNule: -; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}} -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vminnm.f16 s2, [[S4]], [[S2]] -; CHECK: vmin.f16 d0, d1, d0 +; CHECK-LABEL: fp16_vminnm_NNNule: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr.16 s2, .LCPI15_0 +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vminnm.f16 s0, s0, s2 +; CHECK-NEXT: vldr.16 s2, .LCPI15_1 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI15_0: +; CHECK-NEXT: .short 0x5040 @ half 34 +; CHECK-NEXT: .LCPI15_1: +; CHECK-NEXT: .short 0x5300 @ half 56 entry: %0 = bitcast i16 %b to half @@ -207,13 +332,24 @@ } define half @fp16_vminnm_NNNu_rev(i16 signext %b) { -; CHECK-LABEL: fp16_vminnm_NNNu_rev: +; CHECK-LABEL: fp16_vminnm_NNNu_rev: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr.16 s2, .LCPI16_0 +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vldr.16 s2, .LCPI16_1 +; CHECK-NEXT: vminnm.f16 s0, s0, s2 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI16_0: +; CHECK-NEXT: .short 0x5300 @ half 56 +; CHECK-NEXT: .LCPI16_1: +; CHECK-NEXT: .short 0x54e0 @ half 78 -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}} -; CHECK: vmin.f16 d0, d1, d0 -; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} -; CHECK: vminnm.f16 s0, [[S0]], [[S2]] entry: %0 = bitcast i16 %b to half @@ -225,12 +361,21 @@ } define half @fp16_vmaxnm_NNNo(i16 signext %a) { -; CHECK-LABEL: fp16_vmaxnm_NNNo: -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01 -; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}} -; CHECK: vmaxnm.f16 s2, [[S4]], [[S2]] -; CHECK: vmax.f16 d0, d1, d0 +; CHECK-LABEL: fp16_vmaxnm_NNNo: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, #1.200000e+01 +; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 +; CHECK-NEXT: vldr.16 s2, .LCPI17_0 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI17_0: +; CHECK-NEXT: .short 0x5040 @ half 34 entry: %0 = bitcast i16 %a to half %cmp1 = fcmp ogt half %0, 12. @@ -241,12 +386,23 @@ } define half @fp16_vmaxnm_NNNoge(i16 signext %a) { -; CHECK-LABEL: fp16_vmaxnm_NNNoge: -; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}} -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmaxnm.f16 s2, [[S4]], [[S2]] -; CHECK: vmax.f16 d0, d1, d0 +; CHECK-LABEL: fp16_vmaxnm_NNNoge: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr.16 s2, .LCPI18_0 +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 +; CHECK-NEXT: vldr.16 s2, .LCPI18_1 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI18_0: +; CHECK-NEXT: .short 0x5040 @ half 34 +; CHECK-NEXT: .LCPI18_1: +; CHECK-NEXT: .short 0x5300 @ half 56 entry: %0 = bitcast i16 %a to half %cmp1 = fcmp oge half %0, 34. @@ -257,12 +413,23 @@ } define half @fp16_vmaxnm_NNNo_rev(i16 signext %a) { -; CHECK-LABEL: fp16_vmaxnm_NNNo_rev: -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}} -; CHECK: vmax.f16 d0, d1, d0 -; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} -; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]] +; CHECK-LABEL: fp16_vmaxnm_NNNo_rev: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr.16 s2, .LCPI19_0 +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vldr.16 s2, .LCPI19_1 +; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI19_0: +; CHECK-NEXT: .short 0x5300 @ half 56 +; CHECK-NEXT: .LCPI19_1: +; CHECK-NEXT: .short 0x54e0 @ half 78 entry: %0 = bitcast i16 %a to half %cmp1 = fcmp olt half %0, 56. @@ -273,12 +440,23 @@ } define half @fp16_vmaxnm_NNNole_rev(i16 signext %a) { -; CHECK-LABEL: fp16_vmaxnm_NNNole_rev: -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}} -; CHECK: vmax.f16 d0, d1, d0 -; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} -; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]] +; CHECK-LABEL: fp16_vmaxnm_NNNole_rev: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr.16 s2, .LCPI20_0 +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vldr.16 s2, .LCPI20_1 +; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI20_0: +; CHECK-NEXT: .short 0x54e0 @ half 78 +; CHECK-NEXT: .LCPI20_1: +; CHECK-NEXT: .short 0x55a0 @ half 90 entry: %0 = bitcast i16 %a to half %cmp1 = fcmp ole half %0, 78. @@ -289,12 +467,21 @@ } define half @fp16_vmaxnm_NNNu(i16 signext %b) { -; CHECK-LABEL: fp16_vmaxnm_NNNu: -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S2:s[0-9]]], #1.200000e+01 -; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}} -; CHECK: vmaxnm.f16 s2, [[S4]], [[S2]] -; CHECK: vmax.f16 d0, d1, d0 +; CHECK-LABEL: fp16_vmaxnm_NNNu: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, #1.200000e+01 +; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 +; CHECK-NEXT: vldr.16 s2, .LCPI21_0 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI21_0: +; CHECK-NEXT: .short 0x5040 @ half 34 entry: %0 = bitcast i16 %b to half %cmp1 = fcmp ugt half 12., %0 @@ -305,12 +492,23 @@ } define half @fp16_vmaxnm_NNNuge(i16 signext %b) { -; CHECK-LABEL: fp16_vmaxnm_NNNuge: -; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S4:s[0-9]]], r{{.}} -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmaxnm.f16 s2, [[S4]], [[S2]] -; CHECK: vmax.f16 d0, d1, d0 +; CHECK-LABEL: fp16_vmaxnm_NNNuge: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr.16 s2, .LCPI22_0 +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 +; CHECK-NEXT: vldr.16 s2, .LCPI22_1 +; CHECK-NEXT: vcmp.f16 s2, s0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI22_0: +; CHECK-NEXT: .short 0x5040 @ half 34 +; CHECK-NEXT: .LCPI22_1: +; CHECK-NEXT: .short 0x5300 @ half 56 entry: %0 = bitcast i16 %b to half %cmp1 = fcmp uge half 34., %0 @@ -321,11 +519,20 @@ } define half @fp16_vminmaxnm_neg0(i16 signext %a) { -; CHECK-LABEL: fp16_vminmaxnm_neg0: -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}} -; CHECK: vminnm.f16 s2, [[S2]], [[S0]] -; CHECK: vmax.f16 d0, d1, d0 +; CHECK-LABEL: fp16_vminmaxnm_neg0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr.16 s0, .LCPI23_0 +; CHECK-NEXT: vmov.f16 s2, r1 +; CHECK-NEXT: vminnm.f16 s2, s2, s0 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s0, s2 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI23_0: +; CHECK-NEXT: .short 0x8000 @ half -0 entry: %0 = bitcast i16 %a to half %cmp1 = fcmp olt half %0, -0. @@ -336,11 +543,20 @@ } define half @fp16_vminmaxnm_e_0(i16 signext %a) { -; CHECK-LABEL: fp16_vminmaxnm_e_0: -; CHECK: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S0:s[0-9]]], r{{.}} -; CHECK: vmin.f16 d0, d0, d1 -; CHECK: vmaxnm.f16 s0, [[S0]], [[S2]] +; CHECK-LABEL: fp16_vminmaxnm_e_0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vldr.16 s2, .LCPI24_0 +; CHECK-NEXT: vcmp.f16 s0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vmaxnm.f16 s0, s0, s2 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI24_0: +; CHECK-NEXT: .short 0x0000 @ half 0 entry: %0 = bitcast i16 %a to half %cmp1 = fcmp nsz ole half 0., %0 @@ -351,11 +567,20 @@ } define half @fp16_vminmaxnm_e_neg0(i16 signext %a) { -; CHECK-LABEL: fp16_vminmaxnm_e_neg0: -; CHECK: vldr.16 [[S0:s[0-9]]], .LCPI{{.*}} -; CHECK: vmov.f16 [[S2:s[0-9]]], r{{.}} -; CHECK: vminnm.f16 s2, [[S2]], [[S0]] -; CHECK: vmax.f16 d0, d1, d0 +; CHECK-LABEL: fp16_vminmaxnm_e_neg0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr.16 s0, .LCPI25_0 +; CHECK-NEXT: vmov.f16 s2, r1 +; CHECK-NEXT: vminnm.f16 s2, s2, s0 +; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vselge.f16 s0, s0, s2 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI25_0: +; CHECK-NEXT: .short 0x8000 @ half -0 entry: %0 = bitcast i16 %a to half %cmp1 = fcmp nsz ule half -0., %0 diff --git a/llvm/test/CodeGen/ARM/lower-vmax.ll b/llvm/test/CodeGen/ARM/lower-vmax.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/lower-vmax.ll @@ -0,0 +1,25 @@ +; RUN: llc -mtriple=arm-eabihf -mattr=+neon < %s | FileCheck -check-prefixes=CHECK-NO_NEON %s +; RUN: llc -mtriple=arm-eabihf -mattr=+neon,+neonfp < %s | FileCheck -check-prefixes=CHECK-NEON %s + +define float @max_f32(float, float) { +;CHECK-NEON: vmax.f32 +;CHECK-NO_NEON: vcmp.f32 +;CHECK-NO_NEON: vmrs +;CHECK-NO_NEON: vmovgt.f32 + %3 = call nnan float @llvm.maxnum.f32(float %1, float %0) + ret float %3 +} + +declare float @llvm.maxnum.f32(float, float) #1 + +define float @min_f32(float, float) { +;CHECK-NEON: vmin.f32 +;CHECK-NO_NEON: vcmp.f32 +;CHECK-NO_NEON: vmrs +;CHECK-NO_NEON: vmovlt.f32 + %3 = call nnan float @llvm.minnum.f32(float %1, float %0) + ret float %3 +} + +declare float @llvm.minnum.f32(float, float) #1 + diff --git a/llvm/test/CodeGen/ARM/vminmax.ll b/llvm/test/CodeGen/ARM/vminmax.ll --- a/llvm/test/CodeGen/ARM/vminmax.ll +++ b/llvm/test/CodeGen/ARM/vminmax.ll @@ -297,14 +297,18 @@ define float @maxnum(float %a, float %b) { ;CHECK-LABEL: maxnum: -;CHECK: vmax.f32 +;CHECK: vcmp.f32 +;CHECK-NEXT: vmrs +;CHECK-NEXT: vmovgt.f32 %r = call nnan float @llvm.maxnum.f32(float %a, float %b) ret float %r } define float @minnum(float %a, float %b) { ;CHECK-LABEL: minnum: -;CHECK: vmin.f32 +;CHECK: vcmp.f32 +;CHECK-NEXT: vmrs +;CHECK-NEXT: vmovlt.f32 %r = call nnan float @llvm.minnum.f32(float %a, float %b) ret float %r }