Skip to content

Commit

Permalink
[DAGCombiner] Generalize FADD constant combines to work with vectors
Browse files Browse the repository at this point in the history
Updated the FADD combines to work with vectors as well as scalars.

Differential Revision: http://reviews.llvm.org/D13416

llvm-svn: 249251
RKSimon committed Oct 3, 2015
1 parent 004ea24 commit dde6337
Showing 3 changed files with 66 additions and 40 deletions.
33 changes: 17 additions & 16 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
@@ -7987,8 +7987,8 @@ SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -8026,12 +8026,13 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool AllowNewConst = (Level < AfterLegalizeDAG);

// fold (fadd A, 0) -> A
if (N1CFP && N1CFP->isZero())
return N0;
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
if (N1C->isZero())
return N0;

// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(1)))
isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
Flags),
@@ -8050,12 +8051,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));

// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
DAG.getConstantFP(1.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
}
@@ -8064,19 +8065,19 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
DAG.getConstantFP(2.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
}
}

if (N1.getOpcode() == ISD::FMUL) {
ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));

// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
DAG.getConstantFP(1.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
}
@@ -8085,24 +8086,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
DAG.getConstantFP(2.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
}
}

if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
return DAG.getNode(ISD::FMUL, DL, VT,
N1, DAG.getConstantFP(3.0, DL, VT), Flags);
}
}

if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
61 changes: 44 additions & 17 deletions llvm/test/CodeGen/X86/fadd-combines.ll
Original file line number Diff line number Diff line change
@@ -11,13 +11,12 @@ define float @fadd_zero_f32(float %x) #0 {
define <4 x float> @fadd_zero_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_zero_4f32:
; CHECK: # BB#0:
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: addps %xmm1, %xmm0
; CHECK-NEXT: retq
%y = fadd <4 x float> %x, zeroinitializer
ret <4 x float> %y
}

; CHECK: float 3
define float @fadd_2const_f32(float %x) #0 {
; CHECK-LABEL: fadd_2const_f32:
; CHECK: # BB#0:
@@ -28,17 +27,21 @@ define float @fadd_2const_f32(float %x) #0 {
ret float %z
}

; CHECK: float 5.000000e+00
; CHECK: float 5.000000e+00
; CHECK: float 5.000000e+00
; CHECK: float 5.000000e+00
define <4 x float> @fadd_2const_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_2const_4f32:
; CHECK: # BB#0:
; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%y = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
%z = fadd <4 x float> %y, <float 4.0, float 3.0, float 2.0, float 1.0>
ret <4 x float> %z
}

; CHECK: float 3
define float @fadd_x_fmul_x_c_f32(float %x) #0 {
; CHECK-LABEL: fadd_x_fmul_x_c_f32:
; CHECK: # BB#0:
@@ -49,18 +52,21 @@ define float @fadd_x_fmul_x_c_f32(float %x) #0 {
ret float %z
}

; CHECK: float 2.000000e+00
; CHECK: float 3.000000e+00
; CHECK: float 4.000000e+00
; CHECK: float 5.000000e+00
define <4 x float> @fadd_x_fmul_x_c_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_x_fmul_x_c_4f32:
; CHECK: # BB#0:
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
; CHECK-NEXT: mulps %xmm0, %xmm1
; CHECK-NEXT: addps %xmm1, %xmm0
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
%z = fadd <4 x float> %x, %y
ret <4 x float> %z
}

; CHECK: float 3
define float @fadd_fmul_x_c_x_f32(float %x) #0 {
; CHECK-LABEL: fadd_fmul_x_c_x_f32:
; CHECK: # BB#0:
@@ -71,18 +77,21 @@ define float @fadd_fmul_x_c_x_f32(float %x) #0 {
ret float %z
}

; CHECK: float 2.000000e+00
; CHECK: float 3.000000e+00
; CHECK: float 4.000000e+00
; CHECK: float 5.000000e+00
define <4 x float> @fadd_fmul_x_c_x_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_fmul_x_c_x_4f32:
; CHECK: # BB#0:
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
; CHECK-NEXT: mulps %xmm0, %xmm1
; CHECK-NEXT: addps %xmm1, %xmm0
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
%z = fadd <4 x float> %y, %x
ret <4 x float> %z
}

; CHECK: float 4
define float @fadd_fadd_x_x_fmul_x_c_f32(float %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_f32:
; CHECK: # BB#0:
@@ -94,20 +103,22 @@ define float @fadd_fadd_x_x_fmul_x_c_f32(float %x) #0 {
ret float %w
}

; CHECK: float 3.000000e+00
; CHECK: float 4.000000e+00
; CHECK: float 5.000000e+00
; CHECK: float 6.000000e+00
define <4 x float> @fadd_fadd_x_x_fmul_x_c_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_4f32:
; CHECK: # BB#0:
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
; CHECK-NEXT: mulps %xmm0, %xmm1
; CHECK-NEXT: addps %xmm0, %xmm1
; CHECK-NEXT: addps %xmm1, %xmm0
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%y = fadd <4 x float> %x, %x
%z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
%w = fadd <4 x float> %y, %z
ret <4 x float> %w
}

; CHECK: float 4
define float @fadd_fmul_x_c_fadd_x_x_f32(float %x) #0 {
; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_f32:
; CHECK: # BB#0:
@@ -119,20 +130,22 @@ define float @fadd_fmul_x_c_fadd_x_x_f32(float %x) #0 {
ret float %w
}

; CHECK: float 3.000000e+00
; CHECK: float 4.000000e+00
; CHECK: float 5.000000e+00
; CHECK: float 6.000000e+00
define <4 x float> @fadd_fmul_x_c_fadd_x_x_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_4f32:
; CHECK: # BB#0:
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
; CHECK-NEXT: mulps %xmm0, %xmm1
; CHECK-NEXT: addps %xmm0, %xmm1
; CHECK-NEXT: addps %xmm1, %xmm0
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%y = fadd <4 x float> %x, %x
%z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
%w = fadd <4 x float> %z, %y
ret <4 x float> %w
}

; CHECK: float 3
define float @fadd_x_fadd_x_x_f32(float %x) #0 {
; CHECK-LABEL: fadd_x_fadd_x_x_f32:
; CHECK: # BB#0:
@@ -143,6 +156,10 @@ define float @fadd_x_fadd_x_x_f32(float %x) #0 {
ret float %z
}

; CHECK: float 3.000000e+00
; CHECK: float 3.000000e+00
; CHECK: float 3.000000e+00
; CHECK: float 3.000000e+00
define <4 x float> @fadd_x_fadd_x_x_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_x_fadd_x_x_4f32:
; CHECK: # BB#0:
@@ -153,6 +170,7 @@ define <4 x float> @fadd_x_fadd_x_x_4f32(<4 x float> %x) #0 {
ret <4 x float> %z
}

; CHECK: float 3
define float @fadd_fadd_x_x_x_f32(float %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_x_f32:
; CHECK: # BB#0:
@@ -163,6 +181,10 @@ define float @fadd_fadd_x_x_x_f32(float %x) #0 {
ret float %z
}

; CHECK: float 3.000000e+00
; CHECK: float 3.000000e+00
; CHECK: float 3.000000e+00
; CHECK: float 3.000000e+00
define <4 x float> @fadd_fadd_x_x_x_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_x_4f32:
; CHECK: # BB#0:
@@ -173,6 +195,7 @@ define <4 x float> @fadd_fadd_x_x_x_4f32(<4 x float> %x) #0 {
ret <4 x float> %z
}

; CHECK: float 4
define float @fadd_fadd_x_x_fadd_x_x_f32(float %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_f32:
; CHECK: # BB#0:
@@ -183,6 +206,10 @@ define float @fadd_fadd_x_x_fadd_x_x_f32(float %x) #0 {
ret float %z
}

; CHECK: float 4.000000e+00
; CHECK: float 4.000000e+00
; CHECK: float 4.000000e+00
; CHECK: float 4.000000e+00
define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_4f32:
; CHECK: # BB#0:
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/X86/fmul-combines.ll
Original file line number Diff line number Diff line change
@@ -86,15 +86,13 @@ define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x)
}

; More than one use of a constant multiply should not inhibit the optimization.
; Instead of a chain of 2 dependent mults, this test will have 2 independent mults.
; CHECK: float 5.000000e+00
; CHECK: float 1.200000e+01
; CHECK: float 2.100000e+01
; CHECK: float 3.200000e+01
; Instead of a chain of 2 dependent mults, this test will have 2 independent mults.
; CHECK: float 6.000000e+00
; CHECK: float 1.400000e+01
; CHECK: float 2.400000e+01
; CHECK: float 3.600000e+01
; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use:
; CHECK: mulps
; CHECK: mulps
; CHECK: addps
; CHECK: ret
define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 {
%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>

0 comments on commit dde6337

Please sign in to comment.