Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2443,8 +2443,6 @@ SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - if (VT.isVector()) - return SDValue(); EVT CarryVT = N->getValueType(1); SDLoc DL(N); @@ -2455,13 +2453,12 @@ DAG.getUNDEF(CarryVT)); // canonicalize constant to RHS. - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); - if (N0C && !N1C) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0); // fold (uaddo x, 0) -> x + no carry out - if (isNullConstant(N1)) + if (isNullOrNullSplat(N1)) return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); // If it cannot overflow, transform into an add. @@ -2488,7 +2485,9 @@ } SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { - auto VT = N0.getValueType(); + EVT VT = N0.getValueType(); + if (VT.isVector()) + return SDValue(); // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) // If Y + 1 cannot overflow. @@ -2952,8 +2951,6 @@ SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - if (VT.isVector()) - return SDValue(); EVT CarryVT = N->getValueType(1); SDLoc DL(N); @@ -2969,11 +2966,11 @@ DAG.getConstant(0, DL, CarryVT)); // fold (usubo x, 0) -> x + no borrow - if (isNullConstant(N1)) + if (isNullOrNullSplat(N1)) return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow - if (isAllOnesConstant(N0)) + if (isAllOnesOrAllOnesSplat(N0)) return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), DAG.getConstant(0, DL, CarryVT)); Index: llvm/trunk/test/CodeGen/X86/combine-addo.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-addo.ll +++ llvm/trunk/test/CodeGen/X86/combine-addo.ll @@ -62,18 +62,10 @@ define <4 x i32> @combine_vec_uadd_zero(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_uadd_zero: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pmaxud %xmm0, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_uadd_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpmaxud %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -108,24 +100,23 @@ define <4 x i32> @combine_vec_uadd_not(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_uadd_not: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE-NEXT: pxor %xmm2, %xmm0 -; SSE-NEXT: movdqa %xmm0, %xmm3 -; SSE-NEXT: psubd %xmm2, %xmm3 -; SSE-NEXT: pmaxud %xmm3, %xmm0 -; SSE-NEXT: pcmpeqd %xmm3, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm3, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: psubd %xmm0, %xmm2 +; SSE-NEXT: movdqa {{.*#+}} xmm0 = [1,1,1,1] +; SSE-NEXT: pmaxud %xmm2, %xmm0 +; SSE-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_uadd_not: ; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vpmaxud %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] +; AVX-NEXT: vpmaxud %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %1 = xor <4 x i32> %a0, %2 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %1, <4 x i32> ) Index: llvm/trunk/test/CodeGen/X86/combine-subo.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-subo.ll +++ llvm/trunk/test/CodeGen/X86/combine-subo.ll @@ -62,18 +62,10 @@ define <4 x i32> @combine_vec_usub_zero(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_usub_zero: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pminud %xmm0, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_usub_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpminud %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -138,20 +130,12 @@ define <4 x i32> @combine_vec_usub_self(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_usub_self: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: psubd %xmm0, %xmm2 -; SSE-NEXT: pminud %xmm2, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_usub_self: ; AVX: # %bb.0: -; AVX-NEXT: vpsubd %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpminud %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -183,22 +167,14 @@ define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_usub_negone: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE-NEXT: pxor %xmm0, %xmm2 -; SSE-NEXT: pminud %xmm2, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_usub_negone: ; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpminud %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> , <4 x i32> %a0) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0