Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2793,8 +2793,10 @@ SDValue RHS = N.getOperand(2); EVT VT = N.getValueType(); - if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) || - (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) { + if (((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) || + (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) && + (!LHS.hasOneUse() || !RHS.hasOneUse() || + allUsesHaveSourceMods(N.getNode()))) { return distributeOpThroughSelect(DCI, LHS.getOpcode(), SDLoc(N), Cond, LHS, RHS); } @@ -2807,7 +2809,8 @@ // TODO: Support vector constants. ConstantFPSDNode *CRHS = dyn_cast(RHS); - if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) { + if (CRHS && (LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && + allUsesHaveSourceMods(N.getNode())) { SDLoc SL(N); // If one side is an fneg/fabs and the other is a constant, we can push the // fneg/fabs down. If it's an fabs, the constant needs to be non-negative. Index: test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll =================================================================== --- test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll +++ test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll @@ -7,10 +7,9 @@ ; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_legacy_f32: ; GCN: buffer_load_dword [[X:v[0-9]+]] -; GCN: v_rcp_legacy_f32_e32 [[RCP:v[0-9]+]], [[X]] -; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc -; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]] -; GCN-NEXT: buffer_store_dword [[NEG_SELECT]] +; GCN: v_rcp_legacy_f32_e64 [[RCP:v[0-9]+]], -[[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[RCP]], vcc +; GCN-NEXT: buffer_store_dword [[SELECT]] define void @select_fneg_posk_src_rcp_legacy_f32(i32 %c) #2 { %x = load volatile float, float addrspace(1)* undef %y = load volatile float, float addrspace(1)* undef @@ -25,10 +24,9 @@ ; GCN-LABEL: {{^}}select_fneg_posk_src_mul_legacy_f32: ; GCN: buffer_load_dword [[X:v[0-9]+]] -; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], 4.0, [[X]] -; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[MUL]], vcc -; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]] -; GCN-NEXT: buffer_store_dword [[NEG_SELECT]] +; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc +; GCN-NEXT: buffer_store_dword [[SELECT]] define void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 { %x = load volatile float, float addrspace(1)* undef %cmp = icmp eq i32 %c, 0 Index: test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll =================================================================== --- test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll +++ test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll @@ -810,14 +810,12 @@ ret void } -; FIXME: This one should fold to rcp ; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32: ; GCN: buffer_load_dword [[X:v[0-9]+]] -; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]] -; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc -; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]] -; GCN-NEXT: buffer_store_dword [[NEG_SELECT]] +; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[RCP]], vcc +; GCN-NEXT: buffer_store_dword [[SELECT]] define void @select_fneg_posk_src_rcp_f32(i32 %c) #0 { %x = load volatile float, float addrspace(1)* undef %y = load volatile float, float addrspace(1)* undef