diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2138,18 +2138,24 @@ case ISD::SMIN: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - // If we're only wanting the signbit, then we can simplify to OR node. - // TODO: Extend this based on ComputeNumSignBits. - if (DemandedBits.isSignMask()) + // If we're only wanting the signbits, then we can simplify to OR node. + unsigned NumSignBits = + std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1), + TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1)); + unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero(); + if (NumSignBits >= NumDemandedUpperBits) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); break; } case ISD::SMAX: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - // If we're only wanting the signbit, then we can simplify to AND node. - // TODO: Extend this based on ComputeNumSignBits. - if (DemandedBits.isSignMask()) + // If we're only wanting the signbits, then we can simplify to AND node. + unsigned NumSignBits = + std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1), + TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1)); + unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero(); + if (NumSignBits >= NumDemandedUpperBits) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, Op1)); break; } diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -483,28 +483,24 @@ define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) { ; X86-LABEL: signbits_mask_ashr_smax: ; X86: # %bb.0: -; X86-NEXT: vpsrad $25, %xmm0, %xmm0 -; X86-NEXT: vpsrad $25, %xmm1, %xmm1 -; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X86-NEXT: vpsrad $25, %xmm0, %xmm0 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-AVX1-LABEL: signbits_mask_ashr_smax: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1 -; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; X64-AVX1-NEXT: retq ; ; X64-AVX2-LABEL: signbits_mask_ashr_smax: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [25,26,27,0] -; X64-AVX2-NEXT: vpsravd %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 -; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrad $25, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 ; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: retq @@ -521,28 +517,24 @@ define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) { ; X86-LABEL: signbits_mask_ashr_smin: ; X86: # %bb.0: -; X86-NEXT: vpsrad $25, %xmm0, %xmm0 -; X86-NEXT: vpsrad $25, %xmm1, %xmm1 -; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vpor %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X86-NEXT: vpsrad $25, %xmm0, %xmm0 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-AVX1-LABEL: signbits_mask_ashr_smin: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1 -; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; X64-AVX1-NEXT: retq ; ; X64-AVX2-LABEL: signbits_mask_ashr_smin: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [25,26,27,0] -; X64-AVX2-NEXT: vpsravd %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 -; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrad $25, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 ; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: retq