Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -824,6 +824,45 @@ return nullptr; } +// Determines if it is a constant integer or a build vector of constant +// integers (and undefs). +// Do not permit build vector implicit truncation. +static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { + if (ConstantSDNode *Const = dyn_cast(N)) + return !(Const->isOpaque() && NoOpaques); + if (N.getOpcode() != ISD::BUILD_VECTOR) + return false; + unsigned BitWidth = N.getScalarValueSizeInBits(); + for (const SDValue &Op : N->op_values()) { + if (Op.isUndef()) + continue; + ConstantSDNode *Const = dyn_cast(Op); + if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth || + (Const->isOpaque() && NoOpaques)) + return false; + } + return true; +} + +// Determines if it is a constant null integer or a splatted vector of a +// constant null integer (with no undefs). +// Build vector implicit truncation is not an issue for null values. +static bool isNullConstantOrNullSplatConstant(SDValue N) { + if (ConstantSDNode *Splat = isConstOrConstSplat(N)) + return Splat->isNullValue(); + return false; +} + +// Determines if it is a constant integer of one or a splatted vector of a +// constant integer of one (with no undefs). +// Do not permit build vector implicit truncation. +static bool isOneConstantOrOneSplatConstant(SDValue N) { + unsigned BitWidth = N.getScalarValueSizeInBits(); + if (ConstantSDNode *Splat = isConstOrConstSplat(N)) + return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth; + return false; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); @@ -1674,13 +1713,12 @@ if (isNullConstant(N1)) return N0; // fold ((c1-A)+c2) -> (c1+c2)-A - if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) { + if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { if (N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { + if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(N1C->getAPIntValue()+ - N0C->getAPIntValue(), DL, VT), + DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), N0.getOperand(1)); } } @@ -1688,10 +1726,12 @@ if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) return RADD; // fold ((0-A) + B) -> B-A - if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) + if (N0.getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N0.getOperand(0))) return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); // fold (A + (0-B)) -> A-B - if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) + if (N1.getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N1.getOperand(0))) return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) @@ -1723,29 +1763,30 @@ SDValue N10 = N1.getOperand(0); SDValue N11 = N1.getOperand(1); - if (isa(N00) || isa(N10)) + if (isConstantOrConstantVector(N00) || + isConstantOrConstantVector(N10)) return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } - if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (a+b) -> (a|b) iff a and b share no bits. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && - VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) + VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && - isNullConstant(N1.getOperand(0).getOperand(0))) + isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, DAG.getNode(ISD::SHL, SDLoc(N), VT, N1.getOperand(0).getOperand(1), N1.getOperand(1))); if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && - isNullConstant(N0.getOperand(0).getOperand(0))) + isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0))) return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0).getOperand(1), @@ -1758,7 +1799,8 @@ // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. - if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { + if (NumSignBits == DestBits && + isOneConstantOrOneSplatConstant(N1->getOperand(1))) { SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); } @@ -13106,7 +13148,7 @@ // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2 // t11: v8i32 = vector_shuffle t3, t4 // t12: v8i32 = vector_shuffle t5, t6 - // t13: v8i32 = vector_shuffle t7, t8 + // t13: v8i32 = vector_shuffle t7, t8 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11 // t21: v8i32 = vector_shuffle t12, t13 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21 Index: test/CodeGen/X86/combine-add.ll =================================================================== --- test/CodeGen/X86/combine-add.ll +++ test/CodeGen/X86/combine-add.ll @@ -19,18 +19,15 @@ define <4 x i32> @combine_vec_add_constant_sub(<4 x i32> %a) { ; SSE-LABEL: combine_vec_add_constant_sub: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3] -; SSE-NEXT: movdqa %xmm2, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,2,4,6] ; SSE-NEXT: psubd %xmm0, %xmm1 -; SSE-NEXT: paddd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_constant_sub: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3] +; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,2,4,6] ; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = sub <4 x i32> , %a %2 = add <4 x i32> , %1 @@ -41,17 +38,13 @@ define <4 x i32> @combine_vec_add_neg0(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_vec_add_neg0: ; SSE: # BB#0: -; SSE-NEXT: pxor %xmm2, %xmm2 -; SSE-NEXT: psubd %xmm0, %xmm2 -; SSE-NEXT: paddd %xmm1, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: psubd %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_neg0: ; AVX: # BB#0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %1 = sub <4 x i32> zeroinitializer, %a %2 = add <4 x i32> %1, %b @@ -62,16 +55,12 @@ define <4 x i32> @combine_vec_add_neg1(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_vec_add_neg1: ; SSE: # BB#0: -; SSE-NEXT: pxor %xmm2, %xmm2 -; SSE-NEXT: psubd %xmm1, %xmm2 -; SSE-NEXT: paddd %xmm2, %xmm0 +; SSE-NEXT: psubd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_neg1: ; AVX: # BB#0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm1 -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = sub <4 x i32> zeroinitializer, %b %2 = add <4 x i32> %a, %1 @@ -186,18 +175,16 @@ define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) { ; SSE-LABEL: combine_vec_add_sub_sub: ; SSE: # BB#0: +; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 +; SSE-NEXT: paddd %xmm2, %xmm1 ; SSE-NEXT: psubd %xmm1, %xmm0 -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3] -; SSE-NEXT: psubd %xmm2, %xmm1 -; SSE-NEXT: paddd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_sub: ; AVX: # BB#0: +; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3] -; AVX-NEXT: vpsubd %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = sub <4 x i32> %a, %b %2 = sub <4 x i32> , %d @@ -209,18 +196,18 @@ define <4 x i32> @combine_vec_add_uniquebits(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_vec_add_uniquebits: ; SSE: # BB#0: -; SSE-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE-NEXT: paddd %xmm1, %xmm0 +; SSE-NEXT: andps {{.*}}(%rip), %xmm0 +; SSE-NEXT: andps {{.*}}(%rip), %xmm1 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_uniquebits: ; AVX: # BB#0: -; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 -; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 -; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 +; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 +; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = and <4 x i32> %a, %2 = and <4 x i32> %b, @@ -232,18 +219,14 @@ define <4 x i32> @combine_vec_add_shl_neg0(<4 x i32> %x, <4 x i32> %y) { ; SSE-LABEL: combine_vec_add_shl_neg0: ; SSE: # BB#0: -; SSE-NEXT: pxor %xmm2, %xmm2 -; SSE-NEXT: psubd %xmm1, %xmm2 -; SSE-NEXT: pslld $5, %xmm2 -; SSE-NEXT: paddd %xmm2, %xmm0 +; SSE-NEXT: pslld $5, %xmm1 +; SSE-NEXT: psubd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_shl_neg0: ; AVX: # BB#0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX-NEXT: vpslld $5, %xmm1, %xmm1 -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = sub <4 x i32> zeroinitializer, %y %2 = shl <4 x i32> %1, @@ -255,18 +238,14 @@ define <4 x i32> @combine_vec_add_shl_neg1(<4 x i32> %x, <4 x i32> %y) { ; SSE-LABEL: combine_vec_add_shl_neg1: ; SSE: # BB#0: -; SSE-NEXT: pxor %xmm2, %xmm2 -; SSE-NEXT: psubd %xmm1, %xmm2 -; SSE-NEXT: pslld $5, %xmm2 -; SSE-NEXT: paddd %xmm2, %xmm0 +; SSE-NEXT: pslld $5, %xmm1 +; SSE-NEXT: psubd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_shl_neg1: ; AVX: # BB#0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX-NEXT: vpslld $5, %xmm1, %xmm1 -; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = sub <4 x i32> zeroinitializer, %y %2 = shl <4 x i32> %1, @@ -280,15 +259,13 @@ ; SSE-LABEL: combine_vec_add_and_compare: ; SSE: # BB#0: ; SSE-NEXT: pcmpeqd %xmm2, %xmm1 -; SSE-NEXT: psrld $31, %xmm1 -; SSE-NEXT: paddd %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_and_compare: ; AVX: # BB#0: ; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpsrld $31, %xmm1, %xmm1 -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = icmp eq <4 x i32> %a1, %a2 %2 = sext <4 x i1> %1 to <4 x i32>