diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47606,6 +47606,30 @@ PMADDBuilder); } +static SDValue combineAddOrSubToHADDorHSUB(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + EVT VT = N->getValueType(0); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + bool IsAdd = N->getOpcode() == ISD::ADD; + assert((IsAdd || N->getOpcode() == ISD::SUB) && "Wrong opcode"); + + if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 || + VT == MVT::v8i32) && + Subtarget.hasSSSE3() && + isHorizontalBinOp(Op0, Op1, DAG, Subtarget, IsAdd)) { + auto HOpBuilder = [IsAdd](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef Ops) { + return DAG.getNode(IsAdd ? X86ISD::HADD : X86ISD::HSUB, + DL, Ops[0].getValueType(), Ops); + }; + return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, + HOpBuilder); + } + + return SDValue(); +} + static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -47619,17 +47643,8 @@ return MAdd; // Try to synthesize horizontal adds from adds of shuffles. - if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 || - VT == MVT::v8i32) && - Subtarget.hasSSSE3() && - isHorizontalBinOp(Op0, Op1, DAG, Subtarget, true)) { - auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef Ops) { - return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops); - }; - return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, - HADDBuilder); - } + if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget)) + return V; // If vectors of i1 are legal, turn (add (zext (vXi1 X)), Y) into // (sub Y, (sext (vXi1 X))). @@ -47802,18 +47817,8 @@ } // Try to synthesize horizontal subs from subs of shuffles. - EVT VT = N->getValueType(0); - if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 || - VT == MVT::v8i32) && - Subtarget.hasSSSE3() && - isHorizontalBinOp(Op0, Op1, DAG, Subtarget, false)) { - auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef Ops) { - return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops); - }; - return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, - HSUBBuilder); - } + if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget)) + return V; // Try to create PSUBUS if SUB's argument is max/min if (SDValue V = combineSubToSubus(N, DAG, Subtarget)) diff --git a/llvm/test/CodeGen/X86/pr46455.ll b/llvm/test/CodeGen/X86/pr46455.ll --- a/llvm/test/CodeGen/X86/pr46455.ll +++ b/llvm/test/CodeGen/X86/pr46455.ll @@ -8,12 +8,13 @@ ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: movq 24(%rdi), %rcx ; CHECK-NEXT: vcmpneqps (%rax), %ymm0, %ymm0 -; CHECK-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm1 -; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[2,3,0,1] -; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[3,1,2,3] -; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2 -; CHECK-NEXT: vpsubd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpsrld $31, %xmm0, %xmm1 +; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3] +; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] +; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm1 +; CHECK-NEXT: vpsubd %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmovd %xmm0, (%rcx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq