Index: llvm/lib/Analysis/ValueTracking.cpp
===================================================================
--- llvm/lib/Analysis/ValueTracking.cpp
+++ llvm/lib/Analysis/ValueTracking.cpp
@@ -1182,6 +1182,47 @@
       computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
       break;
     }
+
+    // Handle cast from vector integer type to scalar or vector integer.
+    auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
+    if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
+        !I->getType()->isIntOrIntVectorTy())
+      break;
+
+    // Look through a cast from narrow vector elements to wider type.
+    // Examples: v4i32 -> v2i64, v3i8 -> v24
+    unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
+    if (BitWidth % SubBitWidth == 0) {
+      // Known bits are automatically intersected across demanded elements of a
+      // vector. So for example, if a bit is computed as known zero, it must be
+      // zero across all demanded elements of the vector.
+      //
+      // For this bitcast, each demanded element of the output is sub-divided
+      // across a set of smaller vector elements in the source vector. To get
+      // the known bits for an entire element of the output, compute the known
+      // bits for each sub-element sequentially. This is done by shifting the
+      // one-set-bit demanded elements parameter across the sub-elements for
+      // consecutive calls to computeKnownBits. We are using the demanded
+      // elements parameter as a mask operator.
+      //
+      // The known bits of each sub-element are then inserted into place
+      // (dependent on endian) to form the full result of known bits.
+      unsigned NumElts = DemandedElts.getBitWidth();
+      unsigned SubScale = BitWidth / SubBitWidth;
+      APInt SubDemandedElts = APInt::getNullValue(NumElts * SubScale);
+      for (unsigned i = 0; i != NumElts; ++i) {
+        if (DemandedElts[i])
+          SubDemandedElts.setBit(i * SubScale);
+      }
+
+      KnownBits KnownSrc(SubBitWidth);
+      for (unsigned i = 0; i != SubScale; ++i) {
+        computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc,
+                         Depth + 1, Q);
+        unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
+        Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
+      }
+    }
     break;
   }
   case Instruction::SExt: {
Index: llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
===================================================================
--- llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
+++ llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
@@ -2762,14 +2762,18 @@
   ret <2 x i64> %2
 }
 
+; The shift amount is in range (masked with 31 and high 32-bits are zero),
+; so convert to standard IR - https://llvm.org/PR50123
+
 define <2 x i64> @sse2_psll_q_128_masked_bitcast(<2 x i64> %v, <2 x i64> %a) {
 ; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast(
 ; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
 ; CHECK-NEXT:    [[M:%.*]] = and <4 x i32> [[B]], <i32 31, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[I:%.*]] = insertelement <4 x i32> [[M]], i32 0, i32 1
 ; CHECK-NEXT:    [[SHAMT:%.*]] = bitcast <4 x i32> [[I]] to <2 x i64>
-; CHECK-NEXT:    [[R:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[SHAMT]])
-; CHECK-NEXT:    ret <2 x i64> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[SHAMT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
   %b = bitcast <2 x i64> %a to <4 x i32>
   %m = and <4 x i32> %b, <i32 31, i32 poison, i32 poison, i32 poison>
@@ -2779,6 +2783,8 @@
   ret <2 x i64> %r
 }
 
+; TODO: This could be recognized as an over-shift.
+
 define <2 x i64> @sse2_psll_q_128_masked_bitcast_overshift(<2 x i64> %v, <2 x i64> %a) {
 ; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast_overshift(
 ; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
Index: llvm/test/Transforms/InstSimplify/shift-knownbits.ll
===================================================================
--- llvm/test/Transforms/InstSimplify/shift-knownbits.ll
+++ llvm/test/Transforms/InstSimplify/shift-knownbits.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instsimplify -S -data-layout="E" | FileCheck %s
+; RUN: opt < %s -instsimplify -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK,BIGENDIAN
+; RUN: opt < %s -instsimplify -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK,LITTLEENDIAN
 
 ; If any bits of the shift amount are known to make it exceed or equal
 ; the number of bits in the type, the shift causes undefined behavior.
@@ -223,12 +224,11 @@
   ret i8 %ex
 }
 
+; The shift amount is 0 on either of high/low bytes. The middle byte doesn't matter.
+
 define i24 @bitcast_noshift_scalar(<3 x i8> %v1, i24 %v2) {
 ; CHECK-LABEL: @bitcast_noshift_scalar(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 3, i32 1, i32 3>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
-; CHECK-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret i24 [[R]]
+; CHECK-NEXT:    ret i24 [[V2:%.*]]
 ;
   %c = insertelement <3 x i8> poison, i8 0, i64 0
   %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 3, i32 1, i32 3>
@@ -237,12 +237,17 @@
   ret i24 %r
 }
 
+; The shift amount is 0 on low byte of big-endian and unknown on little-endian.
+
 define i24 @bitcast_noshift_scalar_bigend(<3 x i8> %v1, i24 %v2) {
-; CHECK-LABEL: @bitcast_noshift_scalar_bigend(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
-; CHECK-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret i24 [[R]]
+; BIGENDIAN-LABEL: @bitcast_noshift_scalar_bigend(
+; BIGENDIAN-NEXT:    ret i24 [[V2:%.*]]
+;
+; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_bigend(
+; LITTLEENDIAN-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 0, i32 1, i32 3>
+; LITTLEENDIAN-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
+; LITTLEENDIAN-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
+; LITTLEENDIAN-NEXT:    ret i24 [[R]]
 ;
   %c = insertelement <3 x i8> poison, i8 0, i64 0
   %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 0, i32 1, i32 3>
@@ -251,12 +256,17 @@
   ret i24 %r
 }
 
+; The shift amount is 0 on low byte of little-endian and unknown on big-endian.
+
 define i24 @bitcast_noshift_scalar_littleend(<3 x i8> %v1, i24 %v2) {
-; CHECK-LABEL: @bitcast_noshift_scalar_littleend(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
-; CHECK-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret i24 [[R]]
+; BIGENDIAN-LABEL: @bitcast_noshift_scalar_littleend(
+; BIGENDIAN-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 3, i32 1, i32 2>
+; BIGENDIAN-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
+; BIGENDIAN-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
+; BIGENDIAN-NEXT:    ret i24 [[R]]
+;
+; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_littleend(
+; LITTLEENDIAN-NEXT:    ret i24 [[V2:%.*]]
 ;
   %c = insertelement <3 x i8> poison, i8 0, i64 0
   %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 3, i32 1, i32 2>
@@ -265,12 +275,12 @@
   ret i24 %r
 }
 
+; The shift amount is known 24 on little-endian and known 24<<16 on big-endian
+; across all vector elements, so it's an overshift either way.
+
 define <3 x i24> @bitcast_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) {
 ; CHECK-LABEL: @bitcast_overshift_vector(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 24, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
-; CHECK-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret <3 x i24> [[R]]
+; CHECK-NEXT:    ret <3 x i24> poison
 ;
   %c = insertelement <9 x i8> poison, i8 24, i64 0
   %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
@@ -279,12 +289,18 @@
   ret <3 x i24> %r
 }
 
+; The shift amount is known 23 on little-endian and known 23<<16 on big-endian
+; across all vector elements, so it's an overshift for big-endian.
+
 define <3 x i24> @bitcast_overshift_vector_bigend(<9 x i8> %v1, <3 x i24> %v2) {
-; CHECK-LABEL: @bitcast_overshift_vector_bigend(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
-; CHECK-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret <3 x i24> [[R]]
+; BIGENDIAN-LABEL: @bitcast_overshift_vector_bigend(
+; BIGENDIAN-NEXT:    ret <3 x i24> poison
+;
+; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_bigend(
+; LITTLEENDIAN-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
+; LITTLEENDIAN-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
+; LITTLEENDIAN-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
+; LITTLEENDIAN-NEXT:    ret <3 x i24> [[R]]
 ;
   %c = insertelement <9 x i8> poison, i8 23, i64 0
   %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
@@ -293,12 +309,18 @@
   ret <3 x i24> %r
 }
 
+; The shift amount is known 23 on big-endian and known 23<<16 on little-endian
+; across all vector elements, so it's an overshift for little-endian.
+
 define <3 x i24> @bitcast_overshift_vector_littleend(<9 x i8> %v1, <3 x i24> %v2) {
-; CHECK-LABEL: @bitcast_overshift_vector_littleend(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
-; CHECK-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret <3 x i24> [[R]]
+; BIGENDIAN-LABEL: @bitcast_overshift_vector_littleend(
+; BIGENDIAN-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9>
+; BIGENDIAN-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
+; BIGENDIAN-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
+; BIGENDIAN-NEXT:    ret <3 x i24> [[R]]
+;
+; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_littleend(
+; LITTLEENDIAN-NEXT:    ret <3 x i24> poison
 ;
   %c = insertelement <9 x i8> poison, i8 23, i64 0
   %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9>
@@ -307,6 +329,8 @@
   ret <3 x i24> %r
 }
 
+; Negative test - the shift amount is known 24 or 24<<16 on only 2 out of 3 elements.
+
 define <3 x i24> @bitcast_partial_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) {
 ; CHECK-LABEL: @bitcast_partial_overshift_vector(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 24, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7, i32 8>
@@ -321,6 +345,8 @@
   ret <3 x i24> %r
 }
 
+; Negative test - don't know how to look through a cast with non-integer type (but we could handle this...).
+
 define <1 x i64> @bitcast_noshift_vector_wrong_type(<2 x float> %v1, <1 x i64> %v2) {
 ; CHECK-LABEL: @bitcast_noshift_vector_wrong_type(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> <float 0.000000e+00, float poison>, <2 x i32> <i32 2, i32 1>