Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7801,6 +7801,24 @@ N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT)); } } + + // Canonicalize chains of adds to LHS to simplify the following transform. + if (N0.getOpcode() != ISD::FADD && N1.getOpcode() == ISD::FADD) + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); + + // Convert a chain of 3 dependent operations into 2 independent operations + // and 1 dependent operation: + // (fadd N0: (fadd N00: (fadd z, w), N01: y), N1: x) -> + // (fadd N00: (fadd z, w), (fadd N1: x, N01: y)) + if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && + N1.getOpcode() != ISD::FADD) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FADD) { + SDValue N01 = N0.getOperand(1); + SDValue NewAdd = DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N01); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N00, NewAdd); + } + } } // enable-unsafe-fp-math // FADD -> FMA combines: Index: test/CodeGen/X86/fp-fast.ll =================================================================== --- test/CodeGen/X86/fp-fast.ll +++ test/CodeGen/X86/fp-fast.ll @@ -113,3 +113,46 @@ %t2 = fadd float %a, %t1 ret float %t2 } + +; Verify that the first two adds are independent; the destination registers +; are used as source registers for the third add. + +define float @reassociate_adds1(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: reassociate_adds1: +; CHECK: # BB#0: +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddss %xmm2, %xmm3, %xmm1 +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %add0 = fadd float %a, %b + %add1 = fadd float %add0, %c + %add2 = fadd float %add1, %d + ret float %add2 +} + +define float @reassociate_adds2(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: reassociate_adds2: +; CHECK: # BB#0: +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddss %xmm2, %xmm3, %xmm1 +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %add0 = fadd float %a, %b + %add1 = fadd float %c, %add0 + %add2 = fadd float %add1, %d + ret float %add2 +} + +define float @reassociate_adds3(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: reassociate_adds3: +; CHECK: # BB#0: +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddss %xmm2, %xmm3, %xmm1 +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %add0 = fadd float %a, %b + %add1 = fadd float %add0, %c + %add2 = fadd float %d, %add1 + ret float %add2 +} +