Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -503,6 +503,8 @@ case AMDGPUISD::RCP_LEGACY: case AMDGPUISD::SIN_HW: case AMDGPUISD::FMUL_LEGACY: + case AMDGPUISD::FMIN_LEGACY: + case AMDGPUISD::FMAX_LEGACY: return true; default: return false; @@ -2892,6 +2894,21 @@ return performCtlzCombine(SDLoc(N), Cond, True, False, DCI); } +static unsigned inverseMinMax(unsigned Opc) { + switch (Opc) { + case ISD::FMAXNUM: + return ISD::FMINNUM; + case ISD::FMINNUM: + return ISD::FMAXNUM; + case AMDGPUISD::FMAX_LEGACY: + return AMDGPUISD::FMIN_LEGACY; + case AMDGPUISD::FMIN_LEGACY: + return AMDGPUISD::FMAX_LEGACY; + default: + llvm_unreachable("invalid min/max opcode"); + } +} + SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -2987,7 +3004,9 @@ return Res; } case ISD::FMAXNUM: - case ISD::FMINNUM: { + case ISD::FMINNUM: + case AMDGPUISD::FMAX_LEGACY: + case AMDGPUISD::FMIN_LEGACY: { // fneg (fmaxnum x, y) -> fmaxnum (fneg x), (fneg y) // fneg (fminnum x, y) -> fminnum (fneg x), (fneg y) SDValue LHS = N0.getOperand(0); @@ -2995,7 +3014,7 @@ SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); - unsigned Opposite = (Opc == ISD::FMAXNUM) ? ISD::FMINNUM : ISD::FMAXNUM; + unsigned Opposite = inverseMinMax(Opc); SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags()); if (!N0.hasOneUse()) Index: test/CodeGen/AMDGPU/fmin_fmax_legacy.amdgcn.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/fmin_fmax_legacy.amdgcn.ll @@ -0,0 +1,47 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN %s +; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-NONAN -check-prefix=GCN %s + +; FIXME: Should replace unsafe-fp-math with no signed zeros. + +; GCN-LABEL: {{^}}min_fneg_select_regression_0: +; GCN-SAFE: v_max_legacy_f32_e64 [[MIN:v[0-9]+]], -1.0, -v0 +; GCN-NONAN: v_max_f32_e64 v{{[0-9]+}}, -v0, -1.0 +define amdgpu_ps float @min_fneg_select_regression_0(float %a, float %b) #0 { + %fneg.a = fsub float -0.0, %a + %cmp.a = fcmp ult float %a, 1.0 + %min.a = select i1 %cmp.a, float %fneg.a, float -1.0 + ret float %min.a +} + +; GCN-LABEL: {{^}}min_fneg_select_regression_posk_0: +; GCN-SAFE: v_max_legacy_f32_e64 [[MIN:v[0-9]+]], 1.0, -v0 +; GCN-NONAN: v_max_f32_e64 v{{[0-9]+}}, -v0, 1.0 +define amdgpu_ps float @min_fneg_select_regression_posk_0(float %a, float %b) #0 { + %fneg.a = fsub float -0.0, %a + %cmp.a = fcmp ult float %a, -1.0 + %min.a = select i1 %cmp.a, float %fneg.a, float 1.0 + ret float %min.a +} + +; GCN-LABEL: {{^}}max_fneg_select_regression_0: +; GCN-SAFE: v_min_legacy_f32_e64 [[MIN:v[0-9]+]], -1.0, -v0 +; GCN-NONAN: v_min_f32_e64 [[MIN:v[0-9]+]], -v0, -1.0 +define amdgpu_ps float @max_fneg_select_regression_0(float %a, float %b) #0 { + %fneg.a = fsub float -0.0, %a + %cmp.a = fcmp ugt float %a, 1.0 + %min.a = select i1 %cmp.a, float %fneg.a, float -1.0 + ret float %min.a +} + +; GCN-LABEL: {{^}}max_fneg_select_regression_posk_0: +; GCN-SAFE: v_min_legacy_f32_e64 [[MIN:v[0-9]+]], 1.0, -v0 +; GCN-NONAN: v_min_f32_e64 [[MIN:v[0-9]+]], -v0, 1.0 +define amdgpu_ps float @max_fneg_select_regression_posk_0(float %a, float %b) #0 { + %fneg.a = fsub float -0.0, %a + %cmp.a = fcmp ugt float %a, -1.0 + %min.a = select i1 %cmp.a, float %fneg.a, float 1.0 + ret float %min.a +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }