Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -487,6 +487,24 @@ // Target Information //===----------------------------------------------------------------------===// +static bool fnegFoldsIntoOp(unsigned Opc) { + switch (Opc) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FMA: + case ISD::FMAD: + case ISD::FSIN: + case AMDGPUISD::RCP: + case AMDGPUISD::RCP_LEGACY: + case AMDGPUISD::SIN_HW: + case AMDGPUISD::FMUL_LEGACY: + return true; + default: + return false; + } +} + MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const { return MVT::i32; } @@ -2741,20 +2759,31 @@ SDValue NewLHS = LHS.getOperand(0); SDValue NewRHS = RHS; - // TODO: Skip for operations where other combines can absord the fneg. + // Careful: if the neg can be folded up, don't try to pull it back down. + bool ShouldFoldNeg = true; - if (LHS.getOpcode() == ISD::FNEG) - NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); - else if (CRHS->isNegative()) - return SDValue(); + if (NewLHS.hasOneUse()) { + unsigned Opc = NewLHS.getOpcode(); + if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc)) + ShouldFoldNeg = false; + if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL) + ShouldFoldNeg = false; + } - if (Inv) - std::swap(NewLHS, NewRHS); + if (ShouldFoldNeg) { + if (LHS.getOpcode() == ISD::FNEG) + NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); + else if (CRHS->isNegative()) + return SDValue(); - SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, - Cond, NewLHS, NewRHS); - DCI.AddToWorklist(NewSelect.getNode()); - return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect); + if (Inv) + std::swap(NewLHS, NewRHS); + + SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, + Cond, NewLHS, NewRHS); + DCI.AddToWorklist(NewSelect.getNode()); + return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect); + } } return SDValue(); @@ -2809,24 +2838,6 @@ return performCtlzCombine(SDLoc(N), Cond, True, False, DCI); } -static bool fnegFoldsIntoOp(unsigned Opc) { - switch (Opc) { - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - case ISD::FMA: - case ISD::FMAD: - case ISD::FSIN: - case AMDGPUISD::RCP: - case AMDGPUISD::RCP_LEGACY: - case AMDGPUISD::SIN_HW: - case AMDGPUISD::FMUL_LEGACY: - return true; - default: - return false; - } -} - SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; Index: test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll @@ -0,0 +1,46 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s + +; -------------------------------------------------------------------------------- +; Don't fold if fneg can fold into the source +; -------------------------------------------------------------------------------- + +; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_legacy_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] + +; GCN: v_rcp_legacy_f32_e32 [[RCP:v[0-9]+]], [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc +; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]] +; GCN-NEXT: buffer_store_dword [[NEG_SELECT]] +define void @select_fneg_posk_src_rcp_legacy_f32(i32 %c) #2 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %rcp = call float @llvm.amdgcn.rcp.legacy(float %x) + %fneg = fsub float -0.0, %rcp + %select = select i1 %cmp, float %fneg, float 2.0 + store volatile float %select, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}select_fneg_posk_src_mul_legacy_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] + +; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], 4.0, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[MUL]], vcc +; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]] +; GCN-NEXT: buffer_store_dword [[NEG_SELECT]] +define void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 { + %x = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %mul = call float @llvm.amdgcn.fmul.legacy(float %x, float 4.0) + %fneg = fsub float -0.0, %mul + %select = select i1 %cmp, float %fneg, float 2.0 + store volatile float %select, float addrspace(1)* undef + ret void +} + +declare float @llvm.amdgcn.rcp.legacy(float) #1 +declare float @llvm.amdgcn.fmul.legacy(float, float) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll =================================================================== --- test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll +++ test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll @@ -721,7 +721,120 @@ ret void } +; -------------------------------------------------------------------------------- +; Don't fold if fneg can fold into the source +; -------------------------------------------------------------------------------- + +; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Y:v[0-9]+]] + +; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc +; GCN-NEXT: buffer_store_dword [[SELECT]] +define void @select_fneg_posk_src_add_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %add = fadd float %x, 4.0 + %fneg = fsub float -0.0, %add + %select = select i1 %cmp, float %fneg, float 2.0 + store volatile float %select, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] + +; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc +; GCN-NEXT: buffer_store_dword [[SELECT]] +define void @select_fneg_posk_src_sub_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %add = fsub float %x, 4.0 + %fneg = fsub float -0.0, %add + %select = select i1 %cmp, float %fneg, float 2.0 + store volatile float %select, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] + +; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc +; GCN-NEXT: buffer_store_dword [[SELECT]] +define void @select_fneg_posk_src_mul_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %mul = fmul float %x, 4.0 + %fneg = fsub float -0.0, %mul + %select = select i1 %cmp, float %fneg, float 2.0 + store volatile float %select, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc +; GCN-NEXT: buffer_store_dword [[SELECT]] +define void @select_fneg_posk_src_fma_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z) + %fneg = fsub float -0.0, %fma + %select = select i1 %cmp, float %fneg, float 2.0 + store volatile float %select, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN: buffer_load_dword [[Z:v[0-9]+]] + +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc +; GCN-NEXT: buffer_store_dword [[SELECT]] +define void @select_fneg_posk_src_fmad_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %z = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z) + %fneg = fsub float -0.0, %fmad + %select = select i1 %cmp, float %fneg, float 2.0 + store volatile float %select, float addrspace(1)* undef + ret void +} + +; FIXME: This one should fold to rcp +; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32: +; GCN: buffer_load_dword [[X:v[0-9]+]] + +; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]] +; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc +; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]] +; GCN-NEXT: buffer_store_dword [[NEG_SELECT]] +define void @select_fneg_posk_src_rcp_f32(i32 %c) #0 { + %x = load volatile float, float addrspace(1)* undef + %y = load volatile float, float addrspace(1)* undef + %cmp = icmp eq i32 %c, 0 + %rcp = call float @llvm.amdgcn.rcp.f32(float %x) + %fneg = fsub float -0.0, %rcp + %select = select i1 %cmp, float %fneg, float 2.0 + store volatile float %select, float addrspace(1)* undef + ret void +} + declare float @llvm.fabs.f32(float) #1 +declare float @llvm.fma.f32(float, float, float) #1 +declare float @llvm.fmuladd.f32(float, float, float) #1 +declare float @llvm.amdgcn.rcp.f32(float) #1 +declare float @llvm.amdgcn.rcp.legacy(float) #1 +declare float @llvm.amdgcn.fmul.legacy(float, float) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone }