Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7987,8 +7987,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast(N0); - ConstantFPSDNode *N1CFP = dyn_cast(N1); + bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); + bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; @@ -8026,12 +8026,13 @@ bool AllowNewConst = (Level < AfterLegalizeDAG); // fold (fadd A, 0) -> A - if (N1CFP && N1CFP->isZero()) - return N0; + if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) + if (N1C->isZero()) + return N0; // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && - isa(N0.getOperand(1))) + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags), @@ -8050,12 +8051,12 @@ // of rounding steps. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { if (N0.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP00 = dyn_cast(N0.getOperand(0)); - ConstantFPSDNode *CFP01 = dyn_cast(N0.getOperand(1)); + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), DAG.getConstantFP(1.0, DL, VT), Flags); return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); } @@ -8064,19 +8065,19 @@ if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), DAG.getConstantFP(2.0, DL, VT), Flags); return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); } } if (N1.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); - ConstantFPSDNode *CFP11 = dyn_cast(N1.getOperand(1)); + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), DAG.getConstantFP(1.0, DL, VT), Flags); return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); } @@ -8085,16 +8086,16 @@ if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), DAG.getConstantFP(2.0, DL, VT), Flags); return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); } } if (N0.getOpcode() == ISD::FADD && AllowNewConst) { - ConstantFPSDNode *CFP = dyn_cast(N0.getOperand(0)); + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) - if (!CFP && N0.getOperand(0) == N0.getOperand(1) && + if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { return DAG.getNode(ISD::FMUL, DL, VT, N1, DAG.getConstantFP(3.0, DL, VT), Flags); @@ -8102,7 +8103,7 @@ } if (N1.getOpcode() == ISD::FADD && AllowNewConst) { - ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { Index: llvm/trunk/test/CodeGen/X86/fadd-combines.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fadd-combines.ll +++ llvm/trunk/test/CodeGen/X86/fadd-combines.ll @@ -11,13 +11,12 @@ define <4 x float> @fadd_zero_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_zero_4f32: ; CHECK: # BB#0: -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: addps %xmm1, %xmm0 ; CHECK-NEXT: retq %y = fadd <4 x float> %x, zeroinitializer ret <4 x float> %y } +; CHECK: float 3 define float @fadd_2const_f32(float %x) #0 { ; CHECK-LABEL: fadd_2const_f32: ; CHECK: # BB#0: @@ -28,17 +27,21 @@ ret float %z } +; CHECK: float 5.000000e+00 +; CHECK: float 5.000000e+00 +; CHECK: float 5.000000e+00 +; CHECK: float 5.000000e+00 define <4 x float> @fadd_2const_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_2const_4f32: ; CHECK: # BB#0: ; CHECK-NEXT: addps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: addps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = fadd <4 x float> %x, %z = fadd <4 x float> %y, ret <4 x float> %z } +; CHECK: float 3 define float @fadd_x_fmul_x_c_f32(float %x) #0 { ; CHECK-LABEL: fadd_x_fmul_x_c_f32: ; CHECK: # BB#0: @@ -49,18 +52,21 @@ ret float %z } +; CHECK: float 2.000000e+00 +; CHECK: float 3.000000e+00 +; CHECK: float 4.000000e+00 +; CHECK: float 5.000000e+00 define <4 x float> @fadd_x_fmul_x_c_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_x_fmul_x_c_4f32: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00] -; CHECK-NEXT: mulps %xmm0, %xmm1 -; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = fmul <4 x float> %x, %z = fadd <4 x float> %x, %y ret <4 x float> %z } +; CHECK: float 3 define float @fadd_fmul_x_c_x_f32(float %x) #0 { ; CHECK-LABEL: fadd_fmul_x_c_x_f32: ; CHECK: # BB#0: @@ -71,18 +77,21 @@ ret float %z } +; CHECK: float 2.000000e+00 +; CHECK: float 3.000000e+00 +; CHECK: float 4.000000e+00 +; CHECK: float 5.000000e+00 define <4 x float> @fadd_fmul_x_c_x_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_fmul_x_c_x_4f32: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00] -; CHECK-NEXT: mulps %xmm0, %xmm1 -; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = fmul <4 x float> %x, %z = fadd <4 x float> %y, %x ret <4 x float> %z } +; CHECK: float 4 define float @fadd_fadd_x_x_fmul_x_c_f32(float %x) #0 { ; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_f32: ; CHECK: # BB#0: @@ -94,13 +103,14 @@ ret float %w } +; CHECK: float 3.000000e+00 +; CHECK: float 4.000000e+00 +; CHECK: float 5.000000e+00 +; CHECK: float 6.000000e+00 define <4 x float> @fadd_fadd_x_x_fmul_x_c_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_4f32: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00] -; CHECK-NEXT: mulps %xmm0, %xmm1 -; CHECK-NEXT: addps %xmm0, %xmm1 -; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = fadd <4 x float> %x, %x %z = fmul <4 x float> %x, @@ -108,6 +118,7 @@ ret <4 x float> %w } +; CHECK: float 4 define float @fadd_fmul_x_c_fadd_x_x_f32(float %x) #0 { ; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_f32: ; CHECK: # BB#0: @@ -119,13 +130,14 @@ ret float %w } +; CHECK: float 3.000000e+00 +; CHECK: float 4.000000e+00 +; CHECK: float 5.000000e+00 +; CHECK: float 6.000000e+00 define <4 x float> @fadd_fmul_x_c_fadd_x_x_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_4f32: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00] -; CHECK-NEXT: mulps %xmm0, %xmm1 -; CHECK-NEXT: addps %xmm0, %xmm1 -; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = fadd <4 x float> %x, %x %z = fmul <4 x float> %x, @@ -133,6 +145,7 @@ ret <4 x float> %w } +; CHECK: float 3 define float @fadd_x_fadd_x_x_f32(float %x) #0 { ; CHECK-LABEL: fadd_x_fadd_x_x_f32: ; CHECK: # BB#0: @@ -143,6 +156,10 @@ ret float %z } +; CHECK: float 3.000000e+00 +; CHECK: float 3.000000e+00 +; CHECK: float 3.000000e+00 +; CHECK: float 3.000000e+00 define <4 x float> @fadd_x_fadd_x_x_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_x_fadd_x_x_4f32: ; CHECK: # BB#0: @@ -153,6 +170,7 @@ ret <4 x float> %z } +; CHECK: float 3 define float @fadd_fadd_x_x_x_f32(float %x) #0 { ; CHECK-LABEL: fadd_fadd_x_x_x_f32: ; CHECK: # BB#0: @@ -163,6 +181,10 @@ ret float %z } +; CHECK: float 3.000000e+00 +; CHECK: float 3.000000e+00 +; CHECK: float 3.000000e+00 +; CHECK: float 3.000000e+00 define <4 x float> @fadd_fadd_x_x_x_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_fadd_x_x_x_4f32: ; CHECK: # BB#0: @@ -173,6 +195,7 @@ ret <4 x float> %z } +; CHECK: float 4 define float @fadd_fadd_x_x_fadd_x_x_f32(float %x) #0 { ; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_f32: ; CHECK: # BB#0: @@ -183,6 +206,10 @@ ret float %z } +; CHECK: float 4.000000e+00 +; CHECK: float 4.000000e+00 +; CHECK: float 4.000000e+00 +; CHECK: float 4.000000e+00 define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_4f32: ; CHECK: # BB#0: Index: llvm/trunk/test/CodeGen/X86/fmul-combines.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fmul-combines.ll +++ llvm/trunk/test/CodeGen/X86/fmul-combines.ll @@ -86,15 +86,13 @@ } ; More than one use of a constant multiply should not inhibit the optimization. -; Instead of a chain of 2 dependent mults, this test will have 2 independent mults. -; CHECK: float 5.000000e+00 -; CHECK: float 1.200000e+01 -; CHECK: float 2.100000e+01 -; CHECK: float 3.200000e+01 +; Instead of a chain of 2 dependent mults, this test will have 2 independent mults. +; CHECK: float 6.000000e+00 +; CHECK: float 1.400000e+01 +; CHECK: float 2.400000e+01 +; CHECK: float 3.600000e+01 ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use: ; CHECK: mulps -; CHECK: mulps -; CHECK: addps ; CHECK: ret define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 { %y = fmul <4 x float> %x,