Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1036,10 +1036,19 @@ SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + // select (setcc x, K), (fneg x), -K -> + if ((True.getOpcode() == ISD::FNEG || True.getOpcode() == ISD::FABS) && + isa(False)) { + SDValue FoldFalse = DAG.getNode(True.getOpcode(), DL, VT, False); + return combineFMinMaxLegacy(DL, VT, LHS, RHS, + True.getOperand(0), FoldFalse, CC, DCI); + } + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); - SelectionDAG &DAG = DCI.DAG; ISD::CondCode CCOpcode = cast(CC)->get(); switch (CCOpcode) { case ISD::SETOEQ: @@ -2800,8 +2809,10 @@ SDValue RHS = N.getOperand(2); EVT VT = N.getValueType(); - if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) || - (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) { + if (((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) || + (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) && + (!LHS.hasOneUse() || !RHS.hasOneUse() || + allUsesHaveSourceMods(N.getNode()))) { return distributeOpThroughSelect(DCI, LHS.getOpcode(), SDLoc(N), Cond, LHS, RHS); } @@ -2814,7 +2825,8 @@ // TODO: Support vector constants. ConstantFPSDNode *CRHS = dyn_cast(RHS); - if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) { + if (CRHS && (LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && + allUsesHaveSourceMods(N.getNode())) { SDLoc SL(N); // If one side is an fneg/fabs and the other is a constant, we can push the // fneg/fabs down. If it's an fabs, the constant needs to be non-negative. @@ -2854,50 +2866,51 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const { - if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0))) - return Folded; - SDValue Cond = N->getOperand(0); - if (Cond.getOpcode() != ISD::SETCC) - return SDValue(); - - EVT VT = N->getValueType(0); - SDValue LHS = Cond.getOperand(0); - SDValue RHS = Cond.getOperand(1); - SDValue CC = Cond.getOperand(2); - - SDValue True = N->getOperand(1); - SDValue False = N->getOperand(2); - - if (Cond.hasOneUse()) { // TODO: Look for multiple select uses. - SelectionDAG &DAG = DCI.DAG; - if ((DAG.isConstantValueOfAnyType(True) || - DAG.isConstantValueOfAnyType(True)) && - (!DAG.isConstantValueOfAnyType(False) && - !DAG.isConstantValueOfAnyType(False))) { - // Swap cmp + select pair to move constant to false input. - // This will allow using VOPC cndmasks more often. - // select (setcc x, y), k, x -> select (setcc y, x) x, x - - SDLoc SL(N); - ISD::CondCode NewCC = getSetCCInverse(cast(CC)->get(), - LHS.getValueType().isInteger()); + if (Cond.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + SDValue CC = Cond.getOperand(2); + + SDValue True = N->getOperand(1); + SDValue False = N->getOperand(2); + + if (Cond.hasOneUse()) { // TODO: Look for multiple select uses. + SelectionDAG &DAG = DCI.DAG; + if ((DAG.isConstantValueOfAnyType(True) || + DAG.isConstantValueOfAnyType(True)) && + (!DAG.isConstantValueOfAnyType(False) && + !DAG.isConstantValueOfAnyType(False))) { + // Swap cmp + select pair to move constant to false input. + // This will allow using VOPC cndmasks more often. + // select (setcc x, y), k, x -> select (setcc y, x) x, x + + SDLoc SL(N); + ISD::CondCode NewCC = getSetCCInverse(cast(CC)->get(), + LHS.getValueType().isInteger()); + + SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC); + return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True); + } - SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC); - return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True); + if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) { + SDValue MinMax + = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); + // Revisit this node so we can catch min3/max3/med3 patterns. + //DCI.AddToWorklist(MinMax.getNode()); + return MinMax; + } } - if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) { - SDValue MinMax - = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); - // Revisit this node so we can catch min3/max3/med3 patterns. - //DCI.AddToWorklist(MinMax.getNode()); - return MinMax; - } + // There's no reason to not do this if the condition has other uses. + return performCtlzCombine(SDLoc(N), Cond, True, False, DCI); } - // There's no reason to not do this if the condition has other uses. - return performCtlzCombine(SDLoc(N), Cond, True, False, DCI); + if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0))) + return Folded; + + return SDValue(); } static bool isConstantFPZero(SDValue N) { Index: test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll =================================================================== --- test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll +++ test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll @@ -7,10 +7,9 @@ ; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_legacy_f32: ; GCN: buffer_load_dword [[X:v[0-9]+]] -; GCN: v_rcp_legacy_f32_e32 [[RCP:v[0-9]+]], [[X]] -; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc -; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]] -; GCN-NEXT: buffer_store_dword [[NEG_SELECT]] +; GCN: v_rcp_legacy_f32_e64 [[RCP:v[0-9]+]], -[[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[RCP]], vcc +; GCN-NEXT: buffer_store_dword [[SELECT]] define void @select_fneg_posk_src_rcp_legacy_f32(i32 %c) #2 { %x = load volatile float, float addrspace(1)* undef %y = load volatile float, float addrspace(1)* undef @@ -25,10 +24,9 @@ ; GCN-LABEL: {{^}}select_fneg_posk_src_mul_legacy_f32: ; GCN: buffer_load_dword [[X:v[0-9]+]] -; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], 4.0, [[X]] -; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[MUL]], vcc -; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]] -; GCN-NEXT: buffer_store_dword [[NEG_SELECT]] +; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc +; GCN-NEXT: buffer_store_dword [[SELECT]] define void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 { %x = load volatile float, float addrspace(1)* undef %cmp = icmp eq i32 %c, 0 Index: test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll =================================================================== --- test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll +++ test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll @@ -810,14 +810,12 @@ ret void } -; FIXME: This one should fold to rcp ; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32: ; GCN: buffer_load_dword [[X:v[0-9]+]] -; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]] -; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc -; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]] -; GCN-NEXT: buffer_store_dword [[NEG_SELECT]] +; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[RCP]], vcc +; GCN-NEXT: buffer_store_dword [[SELECT]] define void @select_fneg_posk_src_rcp_f32(i32 %c) #0 { %x = load volatile float, float addrspace(1)* undef %y = load volatile float, float addrspace(1)* undef