Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -229,6 +229,10 @@ SmallVectorImpl &Results, SelectionDAG &DAG) const override; + SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, SDValue False, + SDValue CC, DAGCombinerInfo &DCI) const; + SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const; Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1391,15 +1391,16 @@ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args); } -/// Generate Min/Max node -SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT, - SDValue LHS, SDValue RHS, - SDValue True, SDValue False, - SDValue CC, - DAGCombinerInfo &DCI) const { - if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) - return SDValue(); +// TODO: Handle fabs too +static SDValue peekFNeg(SDValue Val) { + if (Val.getOpcode() == ISD::FNEG) + return Val.getOperand(0); + return Val; +} +SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl( + const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, + SDValue False, SDValue CC, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; ISD::CondCode CCOpcode = cast(CC)->get(); switch (CCOpcode) { @@ -1465,6 +1466,39 @@ return SDValue(); } +/// Generate Min/Max node +SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT, + SDValue LHS, SDValue RHS, + SDValue True, SDValue False, + SDValue CC, + DAGCombinerInfo &DCI) const { + if ((LHS == True && RHS == False) || (LHS == False && RHS == True)) + return combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, True, False, CC, DCI); + + SelectionDAG &DAG = DCI.DAG; + + // If we can't directly match this, try to see if we can fold an fneg to + // match. + + ConstantFPSDNode *CRHS = dyn_cast(RHS); + ConstantFPSDNode *CFalse = dyn_cast(False); + SDValue NegTrue = peekFNeg(True); + + // Undo the combine foldFreeOpFromSelect does if it helps us match the min/max + if (LHS == NegTrue && CFalse && CRHS) { + APFloat NegRHS = neg(CRHS->getValueAPF()); + if (NegRHS == CFalse->getValueAPF()) { + SDValue Combined = + combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, NegTrue, False, CC, DCI); + if (Combined) + return DAG.getNode(ISD::FNEG, DL, VT, Combined); + return SDValue(); + } + } + + return SDValue(); +} + std::pair AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); Index: llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -91,6 +91,8 @@ Info.True = MI.getOperand(2).getReg(); Info.False = MI.getOperand(3).getReg(); + // TODO: Handle case where the the selected value is an fneg and the compared + // constant is the negation of the selected value. if (!(Info.LHS == Info.True && Info.RHS == Info.False) && !(Info.LHS == Info.False && Info.RHS == Info.True)) return false; Index: llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll +++ llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll @@ -628,13 +628,12 @@ ; SI-SAFE-LABEL: fneg_fadd_0_nsz_f16: ; SI-SAFE: ; %bb.0: ; %.entry ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s0 -; SI-SAFE-NEXT: v_bfrev_b32_e32 v1, 1 -; SI-SAFE-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; SI-SAFE-NEXT: s_brev_b32 s0, 1 +; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; SI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, 0, v0 +; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 +; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc ; SI-SAFE-NEXT: ; return to shader part epilog ; ; SI-NSZ-LABEL: fneg_fadd_0_nsz_f16: Index: llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll +++ llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll @@ -291,12 +291,10 @@ define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #2 { ; SI-SAFE-LABEL: fneg_fadd_0_nsz_f32: ; SI-SAFE: ; %bb.0: ; %.entry -; SI-SAFE-NEXT: v_bfrev_b32_e32 v0, 1 -; SI-SAFE-NEXT: v_mov_b32_e32 v1, s0 -; SI-SAFE-NEXT: v_cmp_ngt_f32_e64 vcc, s0, 0 -; SI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-SAFE-NEXT: v_min_legacy_f32_e64 v0, 0, s0 +; SI-SAFE-NEXT: s_brev_b32 s0, 1 ; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 -; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 +; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 ; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc ; SI-SAFE-NEXT: ; return to shader part epilog ;