diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3444,6 +3444,7 @@ /// negated form more cheaply than the expression itself. Else return 0. virtual char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, + bool AllowMultiConst, unsigned Depth = 0) const; /// If isNegatibleForFree returns true, return the newly negated expression. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12115,14 +12115,14 @@ // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2) + TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize, false) == 2) return DAG.getNode( ISD::FSUB, DL, VT, N0, TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2) + TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize, false) == 2) return DAG.getNode( ISD::FSUB, DL, VT, N1, TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags); @@ -12304,7 +12304,7 @@ if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize, false)) return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); @@ -12323,7 +12323,7 @@ } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize, false)) return DAG.getNode( ISD::FADD, DL, VT, N0, TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); @@ -12341,9 +12341,9 @@ /// least one input is strictly cheaper in negated form. bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { if (char LHSNeg = - TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize)) + TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize, false)) if (char RHSNeg = - TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize)) + TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize, false)) // Both negated operands are at least as cheap as their counterparts. // Check to see if at least one is cheaper negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -13328,7 +13328,7 @@ if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); - if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) + if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize, false)) return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5413,7 +5413,7 @@ char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, - unsigned Depth) const { + bool AllowMultiConst, unsigned Depth) const { // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return 2; @@ -5422,8 +5422,11 @@ EVT VT = Op.getValueType(); const SDNodeFlags Flags = Op->getFlags(); const TargetOptions &Options = DAG.getTarget().Options; - if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND && - isFPExtFree(VT, Op.getOperand(0).getValueType()))) + if (!Op.hasOneUse() && + !(Op.getOpcode() == ISD::FP_EXTEND && + isFPExtFree(VT, Op.getOperand(0).getValueType())) && + !(AllowMultiConst && Op.getOpcode() == ISD::ConstantFP) + ) return 0; // Don't recurse exponentially. @@ -5468,11 +5471,11 @@ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1)) + ForCodeSize, AllowMultiConst, Depth + 1)) return V; // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); + ForCodeSize, AllowMultiConst, Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) @@ -5485,7 +5488,7 @@ case ISD::FDIV: // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1)) + ForCodeSize, AllowMultiConst, Depth + 1)) return V; // Ignore X * 2.0 because that is expected to be canonicalized to X + X. @@ -5494,7 +5497,7 @@ return 0; return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); + ForCodeSize, AllowMultiConst, Depth + 1); case ISD::FMA: case ISD::FMAD: { @@ -5504,15 +5507,15 @@ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations, - ForCodeSize, Depth + 1); + ForCodeSize, AllowMultiConst, Depth + 1); if (!V2) return 0; // One of Op0/Op1 must be cheaply negatible, then select the cheapest. char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); + ForCodeSize, AllowMultiConst, Depth + 1); char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); + ForCodeSize, AllowMultiConst, Depth + 1); char V01 = std::max(V0, V1); return V01 ? std::max(V01, V2) : 0; } @@ -5521,7 +5524,7 @@ case ISD::FP_ROUND: case ISD::FSIN: return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); + ForCodeSize, AllowMultiConst, Depth + 1); } return 0; @@ -5565,7 +5568,7 @@ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth + 1)) + true, Depth + 1)) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), getNegatedExpression(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, @@ -5592,7 +5595,7 @@ case ISD::FDIV: // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth + 1)) + true, Depth + 1)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), getNegatedExpression(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, @@ -5616,9 +5619,9 @@ ForCodeSize, Depth + 1); char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); + true, ForCodeSize, Depth + 1); char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); + true, ForCodeSize, Depth + 1); if (V0 >= V1) { // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) SDValue Neg0 = getNegatedExpression( diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -806,7 +806,8 @@ /// for the same cost as the expression itself, or 2 if we can compute the /// negated form more cheaply than the expression itself. Else return 0. char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations, - bool ForCodeSize, unsigned Depth) const override; + bool ForCodeSize, bool AllowMultiConst, + unsigned Depth) const override; /// If isNegatibleForFree returns true, return the newly negated expression. SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41709,6 +41709,7 @@ char X86TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, + bool AllowMultiConst, unsigned Depth) const { // fneg patterns are removable even if they have multiple uses. if (isFNEG(DAG, Op.getNode(), Depth)) @@ -41737,7 +41738,7 @@ // extra operand negations as well. for (int i = 0; i != 3; ++i) { char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations, - ForCodeSize, Depth + 1); + ForCodeSize, AllowMultiConst, Depth + 1); if (V == 2) return V; } @@ -41746,7 +41747,7 @@ } return TargetLowering::isNegatibleForFree(Op, DAG, LegalOperations, - ForCodeSize, Depth); + ForCodeSize, AllowMultiConst, Depth); } SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, @@ -41778,7 +41779,7 @@ SmallVector NewOps(Op.getNumOperands(), SDValue()); for (int i = 0; i != 3; ++i) { char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations, - ForCodeSize, Depth + 1); + ForCodeSize, true, Depth + 1); if (V == 2) NewOps[i] = getNegatedExpression(Op.getOperand(i), DAG, LegalOperations, ForCodeSize, Depth + 1); @@ -42628,7 +42629,7 @@ auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) { bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); bool LegalOperations = !DCI.isBeforeLegalizeOps(); - if (TLI.isNegatibleForFree(V, DAG, LegalOperations, CodeSize) == 2) { + if (TLI.isNegatibleForFree(V, DAG, LegalOperations, CodeSize, false) == 2) { V = TLI.getNegatedExpression(V, DAG, LegalOperations, CodeSize); return true; } @@ -42637,7 +42638,7 @@ if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isNullConstant(V.getOperand(1))) { SDValue Vec = V.getOperand(0); - if (TLI.isNegatibleForFree(Vec, DAG, LegalOperations, CodeSize) == 2) { + if (TLI.isNegatibleForFree(Vec, DAG, LegalOperations, CodeSize, false) == 2) { SDValue NegVal = TLI.getNegatedExpression(Vec, DAG, LegalOperations, CodeSize); V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(), @@ -42677,7 +42678,7 @@ bool LegalOperations = !DCI.isBeforeLegalizeOps(); SDValue N2 = N->getOperand(2); - if (TLI.isNegatibleForFree(N2, DAG, LegalOperations, CodeSize) != 2) + if (TLI.isNegatibleForFree(N2, DAG, LegalOperations, CodeSize, false) != 2) return SDValue(); SDValue NegN2 = TLI.getNegatedExpression(N2, DAG, LegalOperations, CodeSize); diff --git a/llvm/test/CodeGen/AMDGPU/const-multiuse-tl.ll b/llvm/test/CodeGen/AMDGPU/const-multiuse-tl.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/const-multiuse-tl.ll @@ -0,0 +1,55 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN %s + +; GCN: v_fma_f32 v0, s0, 0, 0.5 + +define amdgpu_ps void @main(float addrspace(6)* %arg1) #0 { +main_body: + %tmp2 = load float, float addrspace(6)* %arg1 + %tmp3 = call nsz float @llvm.floor.f32(float undef) #2 + %tmp4 = fptosi float %tmp3 to i32 + %tmp5 = add i32 0, %tmp4 + %tmp6 = sub i32 0, %tmp5 + %tmp7 = sitofp i32 %tmp4 to float + %tmp8 = fmul nsz float %tmp7, 0x3FEBB67AE0000000 + %tmp9 = sitofp i32 %tmp6 to float + %tmp10 = call nsz float @llvm.fmuladd.f32(float %tmp9, float 0xBFEBB67AE0000000, float %tmp8) #2 + %tmp11 = fsub nsz float 0.000000e+00, %tmp10 + %tmp12 = call nsz float @llvm.fmuladd.f32(float %tmp11, float 5.000000e-01, float 5.000000e-01) #2 + %tmp13 = call nsz float @llvm.fmuladd.f32(float 0.000000e+00, float %tmp2, float 5.000000e-01) #2 + %tmp14 = call nsz float @llvm.floor.f32(float %tmp13) #2 + %tmp15 = fptosi float %tmp14 to i32 + %tmp16 = icmp eq i32 %tmp15, 0 + br i1 %tmp16, label %endif06, label %if04 + +if04: ; preds = %main_body + %tmp17 = fadd nsz float %tmp12, -5.000000e-01 + %tmp18 = fneg nsz float %tmp17 + %tmp19 = fmul nsz float 0.000000e+00, %tmp18 + %tmp20 = call nsz float @llvm.fmuladd.f32(float 0.000000e+00, float 0.000000e+00, float %tmp19) #2 + %tmp21 = fadd nsz float %tmp20, 5.000000e-01 + %tmp22 = fadd nsz float %tmp21, 0.000000e+00 + %tmp23 = fmul nsz float %tmp22, 0.000000e+00 + %tmp24 = call nsz <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp23, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) #4 + %tmp25 = extractelement <4 x float> %tmp24, i32 3 + %tmp26 = fsub nsz float 1.000000e+00, %tmp25 + %tmp27 = call nsz float @llvm.fmuladd.f32(float undef, float %tmp26, float undef) #2 + unreachable + +endif06: ; preds = %main_body + ret void +} + +; Function Attrs: nounwind readonly +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.floor.f32(float) #3 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.fmuladd.f32(float, float, float) #3 + +attributes #0 = { "no-signed-zeros-fp-math"="true" } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind readnone speculatable willreturn } +attributes #4 = { convergent nounwind readnone }