Index: llvm/lib/Support/KnownBits.cpp
===================================================================
--- llvm/lib/Support/KnownBits.cpp
+++ llvm/lib/Support/KnownBits.cpp
@@ -420,18 +420,19 @@
   assert((!SelfMultiply || (LHS.One == RHS.One && LHS.Zero == RHS.Zero)) &&
          "Self multiplication knownbits mismatch");
 
-  // Compute a conservative estimate for high known-0 bits.
+  // Compute the high known-0 bits by multiplying the unsigned max of each side.
+  // Conservatively, M active bits * N active bits results in M + N bits in the
+  // result. But if we know a value is a power-of-2 for example, then this
+  // computes one more leading zero.
   // TODO: This could be generalized to number of sign bits (negative numbers).
-  unsigned LHSLeadZ = LHS.countMinLeadingZeros();
-  unsigned RHSLeadZ = RHS.countMinLeadingZeros();
-
-  // If either operand is a power-of-2, the multiply is only shifting bits in
-  // the other operand (there can't be a carry into the M+N bit of the result).
-  // Note: if we know that a value is entirely 0, that should simplify below.
-  bool BonusLZ = LHS.countMaxPopulation() == 1 || RHS.countMaxPopulation() == 1;
-
-  unsigned LeadZ = std::max(LHSLeadZ + RHSLeadZ + BonusLZ, BitWidth) - BitWidth;
-  assert(LeadZ <= BitWidth && "More zeros than bits?");
+  APInt UMaxLHS = LHS.getMaxValue();
+  APInt UMaxRHS = RHS.getMaxValue();
+
+  // For leading zeros in the result to be valid, the unsigned max product must
+  // fit in the bitwidth (it must not overflow).
+  bool HasOverflow;
+  APInt UMaxResult = UMaxLHS.umul_ov(UMaxRHS, HasOverflow);
+  unsigned LeadZ = HasOverflow ? 0 : UMaxResult.countLeadingZeros();
 
   // The result of the bottom bits of an integer multiply can be
   // inferred by looking at the bottom bits of both operands and
Index: llvm/test/CodeGen/X86/mul128.ll
===================================================================
--- llvm/test/CodeGen/X86/mul128.ll
+++ llvm/test/CodeGen/X86/mul128.ll
@@ -107,15 +107,12 @@
 define void @PR13897() nounwind {
 ; X64-LABEL: PR13897:
 ; X64:       # %bb.0: # %"0x0"
-; X64-NEXT:    movl bbb(%rip), %ecx
-; X64-NEXT:    movabsq $4294967297, %rdx # imm = 0x100000001
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %rdx
-; X64-NEXT:    addq %rcx, %rdx
+; X64-NEXT:    movl bbb(%rip), %eax
+; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    shlq $32, %rcx
-; X64-NEXT:    addq %rcx, %rdx
-; X64-NEXT:    movq %rax, aaa(%rip)
-; X64-NEXT:    movq %rdx, aaa+8(%rip)
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    movq %rcx, aaa+8(%rip)
+; X64-NEXT:    movq %rcx, aaa(%rip)
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: PR13897:
Index: llvm/test/Transforms/InstCombine/icmp-mul.ll
===================================================================
--- llvm/test/Transforms/InstCombine/icmp-mul.ll
+++ llvm/test/Transforms/InstCombine/icmp-mul.ll
@@ -858,12 +858,11 @@
   ret i1 %r
 }
 
+; The top 32-bits must be zero.
+
 define i1 @splat_mul_known_lz(i32 %x) {
 ; CHECK-LABEL: @splat_mul_known_lz(
-; CHECK-NEXT:    [[Z:%.*]] = zext i32 [[X:%.*]] to i128
-; CHECK-NEXT:    [[M:%.*]] = mul nuw nsw i128 [[Z]], 18446744078004518913
-; CHECK-NEXT:    [[R:%.*]] = icmp ult i128 [[M]], 79228162514264337593543950336
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 true
 ;
   %z = zext i32 %x to i128
   %m = mul i128 %z, 18446744078004518913 ; 0x00000000_00000001_00000001_00000001
@@ -872,6 +871,8 @@
   ret i1 %r
 }
 
+; Negative test - the 33rd bit could be set.
+
 define i1 @splat_mul_unknown_lz(i32 %x) {
 ; CHECK-LABEL: @splat_mul_unknown_lz(
 ; CHECK-NEXT:    [[Z:%.*]] = zext i32 [[X:%.*]] to i128
Index: llvm/test/Transforms/InstCombine/narrow-switch.ll
===================================================================
--- llvm/test/Transforms/InstCombine/narrow-switch.ll
+++ llvm/test/Transforms/InstCombine/narrow-switch.ll
@@ -99,14 +99,14 @@
 ; Make sure to avoid assertion crashes and use the type before
 ; truncation to generate the sub constant expressions that leads
 ; to the recomputed condition.
-; We allow to truncate from i64 to i59 if in 32-bit mode,
+; We allow truncate from i64 to i58 if in 32-bit mode,
 ; because both are illegal.
 
-define void @trunc64to59(i64 %a) {
-; ALL-LABEL: @trunc64to59(
-; CHECK32:         switch i59
-; CHECK32-NEXT:    i59 0, label %sw.bb1
-; CHECK32-NEXT:    i59 18717182647723699, label %sw.bb2
+define void @trunc64to58(i64 %a) {
+; ALL-LABEL: @trunc64to58(
+; CHECK32:         switch i58
+; CHECK32-NEXT:    i58 0, label %sw.bb1
+; CHECK32-NEXT:    i58 18717182647723699, label %sw.bb2
 ; CHECK32-NEXT:    ]
 ; CHECK64:         switch i64
 ; CHECK64-NEXT:    i64 0, label %sw.bb1
Index: llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
+++ llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
@@ -40,21 +40,19 @@
 ; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[WIDE_LOAD4]] to <4 x i32>
-; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw nsw <4 x i32> [[TMP4]], <i32 65792, i32 65792, i32 65792, i32 65792>
-; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw nsw <4 x i32> [[TMP5]], <i32 65792, i32 65792, i32 65792, i32 65792>
-; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP4]], <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
-; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP5]], <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP6]]
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <4 x i32> [[TMP9]], [[TMP7]]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[POUT:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw nsw <4 x i32> [[TMP4]], <i32 65793, i32 65793, i32 65793, i32 65793>
+; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw nsw <4 x i32> [[TMP5]], <i32 65793, i32 65793, i32 65793, i32 65793>
+; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP6]], <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
+; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP7]], <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[POUT:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 4
 ; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP10]], <4 x i32>* [[TMP13]], align 4
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 4
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP11]], <4 x i32>* [[TMP15]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[TMP13]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER5]]
@@ -64,11 +62,10 @@
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER5]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[PIN]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP17]] to i32
-; CHECK-NEXT:    [[REASS_MUL:%.*]] = mul nuw nsw i32 [[CONV]], 65792
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[CONV]], -16777216
-; CHECK-NEXT:    [[OR3:%.*]] = add nsw i32 [[OR2]], [[REASS_MUL]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP15]] to i32
+; CHECK-NEXT:    [[OR2:%.*]] = mul nuw nsw i32 [[CONV]], 65793
+; CHECK-NEXT:    [[OR3:%.*]] = or i32 [[OR2]], -16777216
 ; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[POUT]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store i32 [[OR3]], i32* [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1