Index: llvm/include/llvm/Target/TargetSelectionDAG.td =================================================================== --- llvm/include/llvm/Target/TargetSelectionDAG.td +++ llvm/include/llvm/Target/TargetSelectionDAG.td @@ -474,6 +474,8 @@ def vecreduce_fadd : SDNode<"ISD::VECREDUCE_FADD", SDTFPVecReduce>; def vecreduce_fmin : SDNode<"ISD::VECREDUCE_FMIN", SDTFPVecReduce>; def vecreduce_fmax : SDNode<"ISD::VECREDUCE_FMAX", SDTFPVecReduce>; +def vecreduce_fminimum : SDNode<"ISD::VECREDUCE_FMINIMUM", SDTFPVecReduce>; +def vecreduce_fmaximum : SDNode<"ISD::VECREDUCE_FMAXIMUM", SDTFPVecReduce>; def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>; def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1160,6 +1160,8 @@ if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) { setOperationAction(ISD::VECREDUCE_FMAX, VT, Legal); setOperationAction(ISD::VECREDUCE_FMIN, VT, Legal); + setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Legal); + setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Legal); setOperationAction(ISD::VECREDUCE_FADD, VT, Legal); } Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -787,6 +787,12 @@ def AArch64fminnmv : PatFrags<(ops node:$Rn), [(vecreduce_fmin node:$Rn), (int_aarch64_neon_fminnmv node:$Rn)]>; +def AArch64fmaxv : PatFrags<(ops node:$Rn), + [(vecreduce_fmaximum node:$Rn), + (int_aarch64_neon_fmaxv node:$Rn)]>; +def AArch64fminv : PatFrags<(ops node:$Rn), + [(vecreduce_fminimum node:$Rn), + (int_aarch64_neon_fminv node:$Rn)]>; def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -5823,17 +5829,17 @@ (FMAXNMPv2i32p V64:$Rn)>; def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))), (FMAXNMPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), +def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))), (FMAXPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), +def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))), (FMAXPv2i64p V128:$Rn)>; def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))), (FMINNMPv2i32p V64:$Rn)>; def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))), (FMINNMPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), +def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))), (FMINPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), +def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))), (FMINPv2i64p V128:$Rn)>; //---------------------------------------------------------------------------- @@ -6298,9 +6304,9 @@ defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>; -defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; +defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>; defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>; -defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; +defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>; multiclass SIMDAcrossLaneLongPairIntrinsic { // Patterns for addv(addlp(x)) ==> addlv Index: llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll @@ -71,13 +71,7 @@ ; ; CHECK-FP-LABEL: test_v4f16: ; CHECK-FP: // %bb.0: -; CHECK-FP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP-NEXT: mov h1, v0.h[1] -; CHECK-FP-NEXT: mov h2, v0.h[2] -; CHECK-FP-NEXT: fmax h1, h0, h1 -; CHECK-FP-NEXT: mov h0, v0.h[3] -; CHECK-FP-NEXT: fmax h1, h1, h2 -; CHECK-FP-NEXT: fmax h0, h1, h0 +; CHECK-FP-NEXT: fmaxv h0, v0.4h ; CHECK-FP-NEXT: ret %b = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a) ret half %b @@ -156,14 +150,7 @@ ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fmax v0.8h, v0.8h, v1.8h -; CHECK-FP-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP-NEXT: fmax v0.4h, v0.4h, v1.4h -; CHECK-FP-NEXT: mov h1, v0.h[1] -; CHECK-FP-NEXT: mov h2, v0.h[2] -; CHECK-FP-NEXT: fmax h1, h0, h1 -; CHECK-FP-NEXT: mov h0, v0.h[3] -; CHECK-FP-NEXT: fmax h1, h1, h2 -; CHECK-FP-NEXT: fmax h0, h1, h0 +; CHECK-FP-NEXT: fmaxv h0, v0.8h ; CHECK-FP-NEXT: ret %b = call half @llvm.vector.reduce.fmaximum.v11f16(<11 x half> %a) ret half %b @@ -177,10 +164,7 @@ ; CHECK-NEXT: mov w8, #-8388608 // =0xff800000 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.s[3], v1.s[0] -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmax s0, s0, s1 +; CHECK-NEXT: fmaxv s0, v0.4s ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fmaximum.v3f32(<3 x float> %a) ret float %b @@ -193,10 +177,7 @@ ; CHECK-NEXT: mov w8, #-8388609 // =0xff7fffff ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.s[3], v1.s[0] -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmax s0, s0, s1 +; CHECK-NEXT: fmaxv s0, v0.4s ; CHECK-NEXT: ret %b = call ninf float @llvm.vector.reduce.fmaximum.v3f32(<3 x float> %a) ret float %b @@ -215,10 +196,7 @@ ; CHECK-NEXT: fmax v1.4s, v1.4s, v3.4s ; CHECK-NEXT: fmax v0.4s, v0.4s, v2.4s ; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmax s0, s0, s1 +; CHECK-NEXT: fmaxv s0, v0.4s ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %a) ret float %b @@ -227,8 +205,7 @@ define double @test_v2f64(<2 x double> %a) nounwind { ; CHECK-LABEL: test_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fmax d0, d0, d1 +; CHECK-NEXT: fmaxp d0, v0.2d ; CHECK-NEXT: ret %b = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a) ret double %b Index: llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll @@ -71,13 +71,7 @@ ; ; CHECK-FP-LABEL: test_v4f16: ; CHECK-FP: // %bb.0: -; CHECK-FP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP-NEXT: mov h1, v0.h[1] -; CHECK-FP-NEXT: mov h2, v0.h[2] -; CHECK-FP-NEXT: fmin h1, h0, h1 -; CHECK-FP-NEXT: mov h0, v0.h[3] -; CHECK-FP-NEXT: fmin h1, h1, h2 -; CHECK-FP-NEXT: fmin h0, h1, h0 +; CHECK-FP-NEXT: fminv h0, v0.4h ; CHECK-FP-NEXT: ret %b = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a) ret half %b @@ -156,14 +150,7 @@ ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fmin v0.8h, v0.8h, v1.8h -; CHECK-FP-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP-NEXT: fmin v0.4h, v0.4h, v1.4h -; CHECK-FP-NEXT: mov h1, v0.h[1] -; CHECK-FP-NEXT: mov h2, v0.h[2] -; CHECK-FP-NEXT: fmin h1, h0, h1 -; CHECK-FP-NEXT: mov h0, v0.h[3] -; CHECK-FP-NEXT: fmin h1, h1, h2 -; CHECK-FP-NEXT: fmin h0, h1, h0 +; CHECK-FP-NEXT: fminv h0, v0.8h ; CHECK-FP-NEXT: ret %b = call half @llvm.vector.reduce.fminimum.v11f16(<11 x half> %a) ret half %b @@ -177,10 +164,7 @@ ; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.s[3], v1.s[0] -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fmin v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmin s0, s0, s1 +; CHECK-NEXT: fminv s0, v0.4s ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fminimum.v3f32(<3 x float> %a) ret float %b @@ -193,10 +177,7 @@ ; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.s[3], v1.s[0] -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fmin v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmin s0, s0, s1 +; CHECK-NEXT: fminv s0, v0.4s ; CHECK-NEXT: ret %b = call ninf float @llvm.vector.reduce.fminimum.v3f32(<3 x float> %a) ret float %b @@ -215,10 +196,7 @@ ; CHECK-NEXT: fmin v1.4s, v1.4s, v3.4s ; CHECK-NEXT: fmin v0.4s, v0.4s, v2.4s ; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fmin v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: fmin s0, s0, s1 +; CHECK-NEXT: fminv s0, v0.4s ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> %a) ret float %b @@ -227,8 +205,7 @@ define double @test_v2f64(<2 x double> %a) nounwind { ; CHECK-LABEL: test_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fmin d0, d0, d1 +; CHECK-NEXT: fminp d0, v0.2d ; CHECK-NEXT: ret %b = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a) ret double %b