Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10483,12 +10483,20 @@ case ISD::VECREDUCE_UMIN: return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG); case ISD::VECREDUCE_FMAX: { + // Expand the reduction if the CPU cannot handle it. + if (SrcVT.getVectorElementType() == MVT::f16 && !Subtarget->hasFullFP16()) + return SDValue(); + return DAG.getNode( ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32), Src); } case ISD::VECREDUCE_FMIN: { + // Expand the reduction if the CPU cannot handle it. + if (SrcVT.getVectorElementType() == MVT::f16 && !Subtarget->hasFullFP16()) + return SDValue(); + return DAG.getNode( ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32), Index: llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=FP16 declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a) declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a) +declare half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a) declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a) declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) @@ -14,6 +16,10 @@ ; CHECK-LABEL: test_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; FP16-LABEL: test_v1f16: +; FP16: // %bb.0: +; FP16-NEXT: ret %b = call nnan half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a) ret half %b } @@ -24,6 +30,12 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: ret +; +; FP16-LABEL: test_v1f32: +; FP16: // %bb.0: +; FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; FP16-NEXT: // kill: def $s0 killed $s0 killed $q0 +; FP16-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) ret float %b } @@ -32,6 +44,10 @@ ; CHECK-LABEL: test_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; FP16-LABEL: test_v1f64: +; FP16: // %bb.0: +; FP16-NEXT: ret %b = call nnan double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) ret double %b } @@ -40,10 +56,72 @@ ; CHECK-LABEL: test_v1f128: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; FP16-LABEL: test_v1f128: +; FP16: // %bb.0: +; FP16-NEXT: ret %b = call nnan fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a) ret fp128 %b } +define half @test_v4f16(<4 x half> %a) nounwind { +; CHECK-LABEL: test_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h1, v0.h[3] +; CHECK-NEXT: mov h2, v0.h[2] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fmaxnm s0, s0, s3 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fmaxnm s0, s0, s2 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fmaxnm s0, s0, s1 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ret +; +; FP16-LABEL: test_v4f16: +; FP16: // %bb.0: +; FP16-NEXT: fmaxnmv h0, v0.4h +; FP16-NEXT: ret + %b = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) + ret half %b +} + +define half @test_v4f16_ninf(<4 x half> %a) nounwind { +; CHECK-LABEL: test_v4f16_ninf: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h1, v0.h[3] +; CHECK-NEXT: mov h2, v0.h[2] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fmaxnm s0, s0, s3 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fmaxnm s0, s0, s2 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fmaxnm s0, s0, s1 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ret +; +; FP16-LABEL: test_v4f16_ninf: +; FP16: // %bb.0: +; FP16-NEXT: fmaxnmv h0, v0.4h +; FP16-NEXT: ret + %b = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) + ret half %b +} + define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32: ; CHECK: // %bb.0: @@ -52,6 +130,14 @@ ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: fmaxnmv s0, v0.4s ; CHECK-NEXT: ret +; +; FP16-LABEL: test_v3f32: +; FP16: // %bb.0: +; FP16-NEXT: mov w8, #-8388608 +; FP16-NEXT: fmov s1, w8 +; FP16-NEXT: mov v0.s[3], v1.s[0] +; FP16-NEXT: fmaxnmv s0, v0.4s +; FP16-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a) ret float %b } @@ -64,6 +150,14 @@ ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: fmaxnmv s0, v0.4s ; CHECK-NEXT: ret +; +; FP16-LABEL: test_v3f32_ninf: +; FP16: // %bb.0: +; FP16-NEXT: mov w8, #-8388609 +; FP16-NEXT: fmov s1, w8 +; FP16-NEXT: mov v0.s[3], v1.s[0] +; FP16-NEXT: fmaxnmv s0, v0.4s +; FP16-NEXT: ret %b = call nnan ninf float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a) ret float %b } @@ -72,6 +166,10 @@ ; CHECK-LABEL: test_v2f128: ; CHECK: // %bb.0: ; CHECK-NEXT: b fmaxl +; +; FP16-LABEL: test_v2f128: +; FP16: // %bb.0: +; FP16-NEXT: b fmaxl %b = call nnan fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a) ret fp128 %b } @@ -84,6 +182,14 @@ ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s ; CHECK-NEXT: fmaxnmv s0, v0.4s ; CHECK-NEXT: ret +; +; FP16-LABEL: test_v16f32: +; FP16: // %bb.0: +; FP16-NEXT: fmaxnm v1.4s, v1.4s, v3.4s +; FP16-NEXT: fmaxnm v0.4s, v0.4s, v2.4s +; FP16-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; FP16-NEXT: fmaxnmv s0, v0.4s +; FP16-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) ret float %b } Index: llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -6,6 +6,7 @@ declare double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a) declare fp128 @llvm.vector.reduce.fmin.v1f128(<1 x fp128> %a) +declare half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) declare float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a) declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a) declare float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a) @@ -44,6 +45,62 @@ ret fp128 %b } +define half @test_v4f16(<4 x half> %a) nounwind { +; CHECK-LABEL: test_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h1, v0.h[3] +; CHECK-NEXT: mov h2, v0.h[2] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fminnm s0, s0, s3 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fminnm s0, s0, s2 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ret +; FP16-LABEL: test_v4f16: +; FP16: // %bb.0: +; FP16-NEXT: fminnmv h0, v0.4h +; FP16-NEXT: ret + %b = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) + ret half %b +} + +define half @test_v4f16_ninf(<4 x half> %a) nounwind { +; CHECK-LABEL: test_v4f16_ninf: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h1, v0.h[3] +; CHECK-NEXT: mov h2, v0.h[2] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fminnm s0, s0, s3 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fminnm s0, s0, s2 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ret +; FP16-LABEL: test_v4f16_ninf: +; FP16: // %bb.0: +; FP16-NEXT: fminnmv h0, v0.4h +; FP16-NEXT: ret + %b = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) + ret half %b +} + define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32: ; CHECK: // %bb.0: