Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7652,6 +7652,23 @@ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstantFP(4.0, VT)); } + + // Canonicalize chains of adds to LHS to simplify the following transform. + if (N0.getOpcode() != ISD::FADD && N1.getOpcode() == ISD::FADD) + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); + + // Convert a chain of 3 dependent operations into 2 independent operations + // and 1 dependent operation: + // (fadd x, (fadd y, (fadd z, w))) -> (fadd (fadd x, y), (fadd z, w)) + if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && + N1.getOpcode() != ISD::FADD) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (N00.getOpcode() == ISD::FADD) { + SDValue NewAdd = DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N01); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N00, NewAdd); + } + } } // enable-unsafe-fp-math // FADD -> FMA combines: Index: test/CodeGen/X86/fp-fast.ll =================================================================== --- test/CodeGen/X86/fp-fast.ll +++ test/CodeGen/X86/fp-fast.ll @@ -113,3 +113,46 @@ %t2 = fadd float %a, %t1 ret float %t2 } + +; Verify that the first two adds are independent; the destination registers +; are used as source registers for the third add. + +define float @reassociate_adds1(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: reassociate_adds1: +; CHECK: # BB#0: +; CHECK-NEXT: vaddss {{%xmm[0-9], %xmm[0-9]}}, [[XMM1:%xmm[0-9]]] +; CHECK-NEXT: vaddss {{%xmm[0-9], %xmm[0-9]}}, [[XMM2:%xmm[0-9]]] +; CHECK-NEXT: vaddss [[XMM2]], [[XMM1]], +; CHECK-NEXT: retq + %add0 = fadd float %a, %b + %add1 = fadd float %add0, %c + %add2 = fadd float %add1, %d + ret float %add2 +} + +define float @reassociate_adds2(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: reassociate_adds2: +; CHECK: # BB#0: +; CHECK-NEXT: vaddss {{%xmm[0-9], %xmm[0-9]}}, [[XMM1:%xmm[0-9]]] +; CHECK-NEXT: vaddss {{%xmm[0-9], %xmm[0-9]}}, [[XMM2:%xmm[0-9]]] +; CHECK-NEXT: vaddss [[XMM2]], [[XMM1]], +; CHECK-NEXT: retq + %add0 = fadd float %a, %b + %add1 = fadd float %c, %add0 + %add2 = fadd float %add1, %d + ret float %add2 +} + +define float @reassociate_adds3(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: reassociate_adds3: +; CHECK: # BB#0: +; CHECK-NEXT: vaddss {{%xmm[0-9], %xmm[0-9]}}, [[XMM1:%xmm[0-9]]] +; CHECK-NEXT: vaddss {{%xmm[0-9], %xmm[0-9]}}, [[XMM2:%xmm[0-9]]] +; CHECK-NEXT: vaddss [[XMM2]], [[XMM1]], +; CHECK-NEXT: retq + %add0 = fadd float %a, %b + %add1 = fadd float %add0, %c + %add2 = fadd float %d, %add1 + ret float %add2 +} +