diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1421,12 +1421,16 @@ } if (Subtarget->hasNEON()) { - // vmin and vmax aren't available in a scalar form, so we use - // a NEON instruction with an undef lane instead. + // vmin and vmax aren't available in a scalar form, so we can use + // a NEON instruction with an undef lane instead. This has a performance + // penalty on some cores, so we don't do this for single precision + // values unless we have been asked to by the core tuning model. + if (Subtarget->useNEONForSinglePrecisionFP()) { + setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); + } setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); - setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); - setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal); setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll --- a/llvm/test/CodeGen/ARM/fp16-promote.ll +++ b/llvm/test/CodeGen/ARM/fp16-promote.ll @@ -665,7 +665,9 @@ ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL-VFP: vmov.f32 s{{[0-9]+}}, #1.000000e+00 ; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 -; CHECK-VFP: vmin.f32 +; CHECK-VFP: vcmp.f32 +; CHECK-VFP: vmrs +; CHECK-VFP: vmovlt.f32 ; CHECK-NOVFP: bl __aeabi_fcmpge ; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h @@ -683,7 +685,9 @@ ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL-VFP: vmov.f32 s0, #1.000000e+00 ; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 -; CHECK-VFP: vmax.f32 +; CHECK-VFP: vcmp.f32 +; CHECK-VFP: vmrs +; CHECK-VFP: vmovhi.f32 ; CHECK-NOVFP: bl __aeabi_fcmple ; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h diff --git a/llvm/test/CodeGen/ARM/lower-vmax.ll b/llvm/test/CodeGen/ARM/lower-vmax.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/lower-vmax.ll @@ -0,0 +1,25 @@ +; RUN: llc -mtriple=arm-eabihf -mattr=+neon < %s | FileCheck -check-prefixes=CHECK-NO_NEON %s +; RUN: llc -mtriple=arm-eabihf -mattr=+neon,+neonfp < %s | FileCheck -check-prefixes=CHECK-NEON %s + +define float @max_f32(float, float) { +;CHECK-NEON: vmax.f32 +;CHECK-NO_NEON: vcmp.f32 +;CHECK-NO_NEON: vmrs +;CHECK-NO_NEON: vmovgt.f32 + %3 = call nnan float @llvm.maxnum.f32(float %1, float %0) + ret float %3 +} + +declare float @llvm.maxnum.f32(float, float) #1 + +define float @min_f32(float, float) { +;CHECK-NEON: vmin.f32 +;CHECK-NO_NEON: vcmp.f32 +;CHECK-NO_NEON: vmrs +;CHECK-NO_NEON: vmovlt.f32 + %3 = call nnan float @llvm.minnum.f32(float %1, float %0) + ret float %3 +} + +declare float @llvm.minnum.f32(float, float) #1 + diff --git a/llvm/test/CodeGen/ARM/vminmax.ll b/llvm/test/CodeGen/ARM/vminmax.ll --- a/llvm/test/CodeGen/ARM/vminmax.ll +++ b/llvm/test/CodeGen/ARM/vminmax.ll @@ -297,14 +297,18 @@ define float @maxnum(float %a, float %b) { ;CHECK-LABEL: maxnum: -;CHECK: vmax.f32 +;CHECK: vcmp.f32 +;CHECK-NEXT: vmrs +;CHECK-NEXT: vmovgt.f32 %r = call nnan float @llvm.maxnum.f32(float %a, float %b) ret float %r } define float @minnum(float %a, float %b) { ;CHECK-LABEL: minnum: -;CHECK: vmin.f32 +;CHECK: vcmp.f32 +;CHECK-NEXT: vmrs +;CHECK-NEXT: vmovlt.f32 %r = call nnan float @llvm.minnum.f32(float %a, float %b) ret float %r }