Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2414,8 +2414,6 @@ SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - if (VT.isVector()) - return SDValue(); EVT CarryVT = N->getValueType(1); SDLoc DL(N); @@ -2426,13 +2424,12 @@ DAG.getUNDEF(CarryVT)); // canonicalize constant to RHS. - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); - if (N0C && !N1C) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0); // fold (uaddo x, 0) -> x + no carry out - if (isNullConstant(N1)) + if (isNullOrNullSplat(N1)) return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); // If it cannot overflow, transform into an add. @@ -2898,8 +2895,6 @@ SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - if (VT.isVector()) - return SDValue(); EVT CarryVT = N->getValueType(1); SDLoc DL(N); @@ -2915,11 +2910,11 @@ DAG.getConstant(0, DL, CarryVT)); // fold (usubo x, 0) -> x + no borrow - if (isNullConstant(N1)) + if (isNullOrNullSplat(N1)) return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow - if (isAllOnesConstant(N0)) + if (isAllOnesOrAllOnesSplat(N0)) return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), DAG.getConstant(0, DL, CarryVT)); Index: test/CodeGen/X86/combine-addo.ll =================================================================== --- test/CodeGen/X86/combine-addo.ll +++ test/CodeGen/X86/combine-addo.ll @@ -84,22 +84,10 @@ define <4 x i32> @combine_vec_uadd_zero(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_uadd_zero: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pmaxud %xmm0, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE-NEXT: pxor %xmm3, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm2 -; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_uadd_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpmaxud %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -134,12 +122,12 @@ define <4 x i32> @combine_vec_uadd_not(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_uadd_not: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE-NEXT: pxor %xmm3, %xmm0 -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: psubd %xmm3, %xmm2 -; SSE-NEXT: pmaxud %xmm2, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: psubd %xmm0, %xmm2 +; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1] +; SSE-NEXT: pmaxud %xmm2, %xmm3 +; SSE-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE-NEXT: pxor %xmm3, %xmm0 ; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm2 ; SSE-NEXT: movaps %xmm2, %xmm0 @@ -147,13 +135,14 @@ ; ; AVX-LABEL: combine_vec_uadd_not: ; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpmaxud %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm3, %xmm0 +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] +; AVX-NEXT: vpmaxud %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = xor <4 x i32> %a0, %2 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %1, <4 x i32> ) Index: test/CodeGen/X86/combine-subo.ll =================================================================== --- test/CodeGen/X86/combine-subo.ll +++ test/CodeGen/X86/combine-subo.ll @@ -87,22 +87,10 @@ define <4 x i32> @combine_vec_usub_zero(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_usub_zero: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pminud %xmm0, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE-NEXT: pxor %xmm3, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm2 -; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_usub_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpminud %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -171,24 +159,12 @@ define <4 x i32> @combine_vec_usub_self(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_usub_self: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: psubd %xmm0, %xmm2 -; SSE-NEXT: pminud %xmm2, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE-NEXT: pxor %xmm3, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm2 -; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_usub_self: ; AVX: # %bb.0: -; AVX-NEXT: vpsubd %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpminud %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpxor %xmm3, %xmm0, %xmm0 -; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -220,25 +196,14 @@ define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_usub_negone: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE-NEXT: pxor %xmm3, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm0 -; SSE-NEXT: pminud %xmm3, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: pxor %xmm3, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm2 -; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_usub_negone: ; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpminud %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm3 -; AVX-NEXT: vpxor %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> , <4 x i32> %a0) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0