diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17232,13 +17232,12 @@ } } - const TargetOptions &Options = DAG.getTarget().Options; - if ((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || - (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) - if (SDValue SD = reassociateReduction(IsMin ? ISD::VECREDUCE_FMIN - : ISD::VECREDUCE_FMAX, - Opc, SDLoc(N), VT, N0, N1, Flags)) - return SD; + if (SDValue SD = reassociateReduction( + PropagatesNaN + ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM) + : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX), + Opc, SDLoc(N), VT, N0, N1, Flags)) + return SD; return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/double_reduct.ll b/llvm/test/CodeGen/AArch64/double_reduct.ll --- a/llvm/test/CodeGen/AArch64/double_reduct.ll +++ b/llvm/test/CodeGen/AArch64/double_reduct.ll @@ -34,9 +34,8 @@ ; CHECK-LABEL: fmin_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fminnmv s2, v2.4s +; CHECK-NEXT: fminnm v0.4s, v0.4s, v2.4s ; CHECK-NEXT: fminnmv s0, v0.4s -; CHECK-NEXT: fminnm s0, s0, s2 ; CHECK-NEXT: ret %r1 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a) %r2 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %b) @@ -48,9 +47,8 @@ ; CHECK-LABEL: fmax_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fmaxnmv s2, v2.4s +; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v2.4s ; CHECK-NEXT: fmaxnmv s0, v0.4s -; CHECK-NEXT: fmaxnm s0, s0, s2 ; CHECK-NEXT: ret %r1 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a) %r2 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %b) @@ -62,9 +60,8 @@ ; CHECK-LABEL: fminimum_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fminv s2, v2.4s +; CHECK-NEXT: fmin v0.4s, v0.4s, v2.4s ; CHECK-NEXT: fminv s0, v0.4s -; CHECK-NEXT: fmin s0, s0, s2 ; CHECK-NEXT: ret %r1 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %a) %r2 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %b) @@ -76,9 +73,8 @@ ; CHECK-LABEL: fmaximum_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fmaxv s2, v2.4s +; CHECK-NEXT: fmax v0.4s, v0.4s, v2.4s ; CHECK-NEXT: fmaxv s0, v0.4s -; CHECK-NEXT: fmax s0, s0, s2 ; CHECK-NEXT: ret %r1 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %a) %r2 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %b) diff --git a/llvm/test/CodeGen/AArch64/sve-doublereduct.ll b/llvm/test/CodeGen/AArch64/sve-doublereduct.ll --- a/llvm/test/CodeGen/AArch64/sve-doublereduct.ll +++ b/llvm/test/CodeGen/AArch64/sve-doublereduct.ll @@ -28,9 +28,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: fminnmv s2, p0, z2.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: fminnmv s0, p0, z0.s -; CHECK-NEXT: fminnm s0, s0, s2 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmin.nxv8f32( %a) %r2 = call fast float @llvm.vector.reduce.fmin.nxv4f32( %b) @@ -43,9 +43,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: fmaxnmv s2, p0, z2.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: fmaxnmv s0, p0, z0.s -; CHECK-NEXT: fmaxnm s0, s0, s2 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmax.nxv8f32( %a) %r2 = call fast float @llvm.vector.reduce.fmax.nxv4f32( %b) diff --git a/llvm/test/CodeGen/RISCV/double_reduct.ll b/llvm/test/CodeGen/RISCV/double_reduct.ll --- a/llvm/test/CodeGen/RISCV/double_reduct.ll +++ b/llvm/test/CodeGen/RISCV/double_reduct.ll @@ -44,15 +44,12 @@ define float @fmin_f32(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: fmin_f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flw fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa5 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: vfredmin.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa5, v8 -; CHECK-NEXT: vfredmin.vs v8, v9, v10 -; CHECK-NEXT: vfmv.f.s fa4, v8 -; CHECK-NEXT: fmin.s fa0, fa5, fa4 +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) %r2 = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %b) @@ -63,15 +60,12 @@ define float @fmax_f32(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: fmax_f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flw fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: lui a0, 1047552 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa5 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: vfredmax.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa5, v8 -; CHECK-NEXT: vfredmax.vs v8, v9, v10 -; CHECK-NEXT: vfmv.f.s fa4, v8 -; CHECK-NEXT: fmax.s fa0, fa5, fa4 +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) %r2 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %b) diff --git a/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll b/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll --- a/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll +++ b/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll @@ -35,13 +35,10 @@ ; CHECK-LABEL: fmin_f32: ; CHECK: @ %bb.0: ; CHECK-NEXT: vminnm.f32 q0, q0, q1 -; CHECK-NEXT: vminnm.f32 s4, s8, s9 +; CHECK-NEXT: vminnm.f32 q0, q0, q2 ; CHECK-NEXT: vminnm.f32 s2, s2, s3 ; CHECK-NEXT: vminnm.f32 s0, s0, s1 ; CHECK-NEXT: vminnm.f32 s0, s0, s2 -; CHECK-NEXT: vminnm.f32 s2, s10, s11 -; CHECK-NEXT: vminnm.f32 s2, s4, s2 -; CHECK-NEXT: vminnm.f32 s0, s0, s2 ; CHECK-NEXT: bx lr %r1 = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a) %r2 = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %b) @@ -53,13 +50,10 @@ ; CHECK-LABEL: fmax_f32: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 -; CHECK-NEXT: vmaxnm.f32 s4, s8, s9 +; CHECK-NEXT: vmaxnm.f32 q0, q0, q2 ; CHECK-NEXT: vmaxnm.f32 s2, s2, s3 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s2 -; CHECK-NEXT: vmaxnm.f32 s2, s10, s11 -; CHECK-NEXT: vmaxnm.f32 s2, s4, s2 -; CHECK-NEXT: vmaxnm.f32 s0, s0, s2 ; CHECK-NEXT: bx lr %r1 = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a) %r2 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %b)