diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -451,7 +451,11 @@
     }
   }
 
-  Known = KnownBits::mul(Known, Known2);
+  bool SelfMultiply = Op0 == Op1;
+  // TODO: SelfMultiply can be poison, but not undef.
+  SelfMultiply &=
+      isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
+  Known = KnownBits::mul(Known, Known2, SelfMultiply);
 
   // Only make use of no-wrap flags if we failed to compute the sign bit
   // directly.  This matters if the multiplication always overflows, in
diff --git a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll
--- a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll
+++ b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll
@@ -70,8 +70,7 @@
 ; CHECK-LABEL: @combine_mul_self_demandedbits_vector(
 ; CHECK-NEXT:    [[TMP1:%.*]] = freeze <4 x i32> [[X:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i32> [[TMP2]], <i32 -3, i32 -3, i32 -3, i32 -3>
-; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %1 = freeze <4 x i32> %x
   %2 = mul <4 x i32> %1, %1