Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2687,12 +2687,42 @@ KnownOne = KnownOne2.byteSwap(); break; } - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: + case ISD::UMIN: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + + // UMIN - we know that the result will have the maximum of the + // known zeros leading bits of the inputs. + unsigned LeadZero = KnownZero.countLeadingOnes(); + LeadZero = std::max(LeadZero, KnownZero2.countLeadingOnes()); + + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + KnownZero |= APInt::getHighBitsSet(BitWidth, LeadZero); + break; + } case ISD::UMAX: { computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + + // UMAX - we know that the result will have the maximum of the + // known one leading bits of the inputs. + unsigned LeadOne = KnownOne.countLeadingOnes(); + LeadOne = std::max(LeadOne, KnownOne2.countLeadingOnes()); + + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + KnownOne |= APInt::getHighBitsSet(BitWidth, LeadOne); + break; + } + case ISD::SMIN: + case ISD::SMAX: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); // If we don't know any bits, early out. if (!KnownOne && !KnownZero) break; Index: test/CodeGen/X86/known-bits-vector.ll =================================================================== --- test/CodeGen/X86/known-bits-vector.ll +++ test/CodeGen/X86/known-bits-vector.ll @@ -481,22 +481,14 @@ ; X32: # BB#0: ; X32-NEXT: vpminud {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] -; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vpsrld $16, %xmm0, %xmm0 -; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: knownbits_umin_shuffle_uitofp: ; X64: # BB#0: ; X64-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] -; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vpsrld $16, %xmm0, %xmm0 -; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> ) %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> @@ -511,14 +503,12 @@ ; X32: # BB#0: ; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2] -; X32-NEXT: vpsrad $31, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: knownbits_umax_shuffle_ashr: ; X64: # BB#0: ; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2] -; X64-NEXT: vpsrad $31, %xmm0, %xmm0 ; X64-NEXT: retq %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> ) %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>