Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2705,8 +2705,13 @@ SDValue True = N->getOperand(1); SDValue False = N->getOperand(2); - if (VT == MVT::f32 && Cond.hasOneUse()) - return CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); + if (VT == MVT::f32 && Cond.hasOneUse()) { + SDValue MinMax + = CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); + // Revisit this node so we can catch min3/max3/med3 patterns. + //DCI.AddToWorklist(MinMax.getNode()); + return MinMax; + } // There's no reason to not do this if the condition has other uses. return performCtlzCombine(SDLoc(N), Cond, True, False, DCI); Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -2204,29 +2204,31 @@ // Only do this if the inner op has one use since this will just increases // register pressure for no benefit. - // max(max(a, b), c) -> max3(a, b, c) - // min(min(a, b), c) -> min3(a, b, c) - if (Op0.getOpcode() == Opc && Op0.hasOneUse()) { - SDLoc DL(N); - return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc), - DL, - N->getValueType(0), - Op0.getOperand(0), - Op0.getOperand(1), - Op1); - } - - // Try commuted. - // max(a, max(b, c)) -> max3(a, b, c) - // min(a, min(b, c)) -> min3(a, b, c) - if (Op1.getOpcode() == Opc && Op1.hasOneUse()) { - SDLoc DL(N); - return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc), - DL, - N->getValueType(0), - Op0, - Op1.getOperand(0), - Op1.getOperand(1)); + if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY) { + // max(max(a, b), c) -> max3(a, b, c) + // min(min(a, b), c) -> min3(a, b, c) + if (Op0.getOpcode() == Opc && Op0.hasOneUse()) { + SDLoc DL(N); + return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc), + DL, + N->getValueType(0), + Op0.getOperand(0), + Op0.getOperand(1), + Op1); + } + + // Try commuted. + // max(a, max(b, c)) -> max3(a, b, c) + // min(a, min(b, c)) -> min3(a, b, c) + if (Op1.getOpcode() == Opc && Op1.hasOneUse()) { + SDLoc DL(N); + return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc), + DL, + N->getValueType(0), + Op0, + Op1.getOperand(0), + Op1.getOperand(1)); + } } // min(max(x, K0), K1), K0 < K1 -> med3(x, K0, K1) @@ -2241,7 +2243,9 @@ } // fminnum(fmaxnum(x, K0), K1), K0 < K1 && !is_snan(x) -> fmed3(x, K0, K1) - if (Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM && + if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) || + (Opc == AMDGPUISD::FMIN_LEGACY && + Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) && N->getValueType(0) == MVT::f32 && Op0.hasOneUse()) { if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1)) return Res; @@ -2291,12 +2295,14 @@ return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); case ISD::SETCC: return performSetCCCombine(N, DCI); - case ISD::FMAXNUM: // TODO: What about fmax_legacy? + case ISD::FMAXNUM: case ISD::FMINNUM: case ISD::SMAX: case ISD::SMIN: case ISD::UMAX: - case ISD::UMIN: { + case ISD::UMIN: + case AMDGPUISD::FMIN_LEGACY: + case AMDGPUISD::FMAX_LEGACY: { if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG && N->getValueType(0) != MVT::f64 && getTargetMachine().getOptLevel() > CodeGenOpt::None) Index: test/CodeGen/AMDGPU/fmed3.ll =================================================================== --- test/CodeGen/AMDGPU/fmed3.ll +++ test/CodeGen/AMDGPU/fmed3.ll @@ -126,6 +126,29 @@ ret void } +; GCN-LABEL: {{^}}v_test_legacy_fmed3_r_i_i_f32: +; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0 + +; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} +; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} +define void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid + %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid + %a = load float, float addrspace(1)* %gep0 + + ; fmax_legacy + %cmp0 = fcmp ule float %a, 2.0 + %max = select i1 %cmp0, float 2.0, float %a + + ; fmin_legacy + %cmp1 = fcmp uge float %max, 4.0 + %med = select i1 %cmp1, float 4.0, float %max + + store float %med, float addrspace(1)* %outgep + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" } attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }