diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -113,6 +113,20 @@
   return Ret;
 }
 
+static Constant *sanitizeUndefsTo(Constant *C, Constant *Replacement) {
+  auto Fix = [&](Constant *V) {
+    return !V || !isa<UndefValue>(V) ? V : Replacement;
+  };
+
+  if (auto *CV = dyn_cast<ConstantVector>(C)) {
+    llvm::SmallVector<Constant *, 32> NewOps(CV->getNumOperands());
+    for (const auto &I : llvm::zip(CV->operands(), NewOps))
+      std::get<1>(I) = Fix(cast_or_null<Constant>(std::get<0>(I).get()));
+    return ConstantVector::get(NewOps);
+  }
+  return C;
+}
+
 // If we have some pattern that leaves only some low bits set, and then performs
 // left-shift of those bits, if none of the bits that are left after the final
 // shift are modified by the mask, we can omit the mask.
@@ -159,31 +173,27 @@
         MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q));
     if (!SumOfShAmts)
       return nullptr; // Did not simplify.
+    // In this pattern SumOfShAmts correlates with the number of low bits
+    // that shall remain in the root value (OuterShift).
+
     Type *Ty = X->getType();
-    unsigned BitWidth = Ty->getScalarSizeInBits();
-    // In this pattern SumOfShAmts correlates with the number of low bits that
-    // shall remain in the root value (OuterShift). If SumOfShAmts is less than
-    // bitwidth, we'll need to also produce a mask to keep SumOfShAmts low bits.
-    // So, does *any* channel need a mask?
-    if (!match(SumOfShAmts, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_UGE,
-                                               APInt(BitWidth, BitWidth)))) {
-      // But for a mask we need to get rid of old masking instruction.
-      if (!Masked->hasOneUse())
-        return nullptr; // Else we can't perform the fold.
-      // The mask must be computed in a type twice as wide to ensure
-      // that no bits are lost if the sum-of-shifts is wider than the base type.
-      Type *ExtendedTy = Ty->getExtendedType();
-      auto *ExtendedSumOfShAmts =
-          ConstantExpr::getZExt(SumOfShAmts, ExtendedTy);
-      // And compute the mask as usual: ~(-1 << (SumOfShAmts))
-      auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy);
-      auto *ExtendedInvertedMask =
-          ConstantExpr::getShl(ExtendedAllOnes, ExtendedSumOfShAmts);
-      auto *ExtendedMask = ConstantExpr::getNot(ExtendedInvertedMask);
-      NewMask = ConstantExpr::getTrunc(ExtendedMask, Ty);
-    } else
-      NewMask = nullptr; // No mask needed.
-    // All good, we can do this fold.
+
+    // The mask must be computed in a type twice as wide to ensure
+    // that no bits are lost if the sum-of-shifts is wider than the base type.
+    Type *ExtendedTy = Ty->getExtendedType();
+    // If any of these shift amounts are undef, *ext will turn them into zeros,
+    // let's keep undef's by replacing them with some illegal shift amount.
+    SumOfShAmts = sanitizeUndefsTo(
+        SumOfShAmts,
+        ConstantInt::get(SumOfShAmts->getType()->getScalarType(),
+                         ExtendedTy->getScalarType()->getScalarSizeInBits()));
+    auto *ExtendedSumOfShAmts = ConstantExpr::getZExt(SumOfShAmts, ExtendedTy);
+    // And compute the mask as usual: ~(-1 << (SumOfShAmts))
+    auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy);
+    auto *ExtendedInvertedMask =
+        ConstantExpr::getShl(ExtendedAllOnes, ExtendedSumOfShAmts);
+    auto *ExtendedMask = ConstantExpr::getNot(ExtendedInvertedMask);
+    NewMask = ConstantExpr::getTrunc(ExtendedMask, Ty);
   } else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X))) ||
              match(Masked, m_Shr(m_Shl(m_Value(X), m_Value(MaskShAmt)),
                                  m_Deferred(MaskShAmt)))) {
@@ -193,42 +203,49 @@
     if (!ShAmtsDiff)
       return nullptr; // Did not simplify.
     // In this pattern ShAmtsDiff correlates with the number of high bits that
-    // shall be unset in the root value (OuterShift). If ShAmtsDiff is negative,
-    // we'll need to also produce a mask to unset ShAmtsDiff high bits.
-    // So, does *any* channel need a mask? (is ShiftShAmt u>= MaskShAmt ?)
-    if (!match(ShAmtsDiff, m_NonNegative())) {
-      // This sub-fold (with mask) is invalid for 'ashr' "masking" instruction.
-      if (match(Masked, m_AShr(m_Value(), m_Value())))
-        return nullptr;
-      // For a mask we need to get rid of old masking instruction.
-      if (!Masked->hasOneUse())
-        return nullptr; // Else we can't perform the fold.
-      Type *Ty = X->getType();
-      unsigned BitWidth = Ty->getScalarSizeInBits();
-      // The mask must be computed in a type twice as wide to ensure
-      // that no bits are lost if the sum-of-shifts is wider than the base type.
-      Type *ExtendedTy = Ty->getExtendedType();
-      auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt(
-          ConstantExpr::getAdd(
-              ConstantExpr::getNeg(ShAmtsDiff),
-              ConstantInt::get(Ty, BitWidth, /*isSigned=*/false)),
-          ExtendedTy);
-      // And compute the mask as usual: (-1 l>> (ShAmtsDiff))
-      auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy);
-      auto *ExtendedMask =
-          ConstantExpr::getLShr(ExtendedAllOnes, ExtendedNumHighBitsToClear);
-      NewMask = ConstantExpr::getTrunc(ExtendedMask, Ty);
-    } else
-      NewMask = nullptr; // No mask needed.
-    // All good, we can do this fold.
+    // shall be unset in the root value (OuterShift).
+
+    Type *Ty = X->getType();
+    unsigned BitWidth = Ty->getScalarSizeInBits();
+
+    // The mask must be computed in a type twice as wide to ensure
+    // that no bits are lost if the sum-of-shifts is wider than the base type.
+    Type *ExtendedTy = Ty->getExtendedType();
+    // If any of these shift amounts are undef, *ext will turn them into zeros,
+    // let's keep undef's by replacing them with some illegal shift amount.
+    ShAmtsDiff = sanitizeUndefsTo(
+        ShAmtsDiff,
+        ConstantInt::get(ShAmtsDiff->getType()->getScalarType(), -BitWidth));
+    auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt(
+        ConstantExpr::getSub(ConstantInt::get(ShAmtsDiff->getType(), BitWidth,
+                                              /*isSigned=*/false),
+                             ShAmtsDiff),
+        ExtendedTy);
+    // And compute the mask as usual: (-1 l>> (NumHighBitsToClear))
+    auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy);
+    auto *ExtendedMask =
+        ConstantExpr::getLShr(ExtendedAllOnes, ExtendedNumHighBitsToClear);
+    NewMask = ConstantExpr::getTrunc(ExtendedMask, Ty);
   } else
     return nullptr; // Don't know anything about this pattern.
 
-  // No 'NUW'/'NSW'!
-  // We no longer know that we won't shift-out non-0 bits.
+  // Does this mask has any unset bits? If not then we can just not apply it.
+  bool NeedMask = !match(NewMask, m_AllOnes());
+
+  // If we need to apply a mask, there are several more restrictions we have.
+  if (NeedMask) {
+    // The old masking instruction must go away.
+    if (!Masked->hasOneUse())
+      return nullptr;
+    // The original "masking" instruction must not have been`ashr`.
+    if (match(Masked, m_AShr(m_Value(), m_Value())))
+      return nullptr;
+  }
+
+  // No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits.
   auto *NewShift =
       BinaryOperator::Create(OuterShift->getOpcode(), X, ShiftShAmt);
-  if (!NewMask)
+  if (!NeedMask)
     return NewShift;
 
   Builder.Insert(NewShift);
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll
@@ -82,7 +82,7 @@
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 0, i32 2147483647>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll
@@ -82,7 +82,7 @@
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 0, i32 2147483647>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll
@@ -62,7 +62,7 @@
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]]
-; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 -1, i32 2147483647>
+; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T3]]
 ;
   %t0 = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>, %nbits
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll
@@ -72,7 +72,7 @@
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T3]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T3]]
-; CHECK-NEXT:    [[T4:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 -1, i32 2147483647>
+; CHECK-NEXT:    [[T4:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T4]]
 ;
   %t0 = shl <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>, %nbits
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll
@@ -62,7 +62,7 @@
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X]], [[T2]]
-; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 -1, i32 2147483647>
+; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T3]]
 ;
   %t0 = shl <8 x i32> %x, %nbits