diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2289,6 +2289,19 @@
 
   unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
   unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
+
+  // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
+  //
+  //
+  Value *X, *Y;
+  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
+      X->getType()->isVectorTy() &&
+      cast<FixedVectorType>(X->getType())->getNumElements() == VWidth &&
+      X->getType() == Y->getType()) {
+    Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask());
+    return new BitCastInst(V, SVI.getType());
+  }
+
   ArrayRef<int> Mask = SVI.getShuffleMask();
   Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
 
@@ -2298,7 +2311,6 @@
   // TODO: This could be extended to allow length-changing shuffles.
   //       The transform might also be obsoleted if we allowed canonicalization
   //       of bitcasted shuffles.
-  Value *X;
   if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
       X->getType()->isVectorTy() && VWidth == LHSWidth) {
     // Try to create a scaled mask constant.
diff --git a/llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll b/llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll
--- a/llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll
+++ b/llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll
@@ -5,11 +5,9 @@
 ; CHECK-LABEL: @vtrn1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[I:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP3]], <2 x float>* [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x i32>*
+; CHECK-NEXT:    store <2 x i32> [[TMP1]], <2 x i32>* [[TMP2]], align 1
 ; CHECK-NEXT:    ret void
 ;
 {
diff --git a/llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll b/llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll
--- a/llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll
+++ b/llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll
@@ -4,13 +4,10 @@
 define void @vtrn2([8 x i8]* nocapture %result, i32 %i, i32 %j, i32 %k) {
 ; CHECK-LABEL: @vtrn2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[J:%.*]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[K:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <2 x float>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP4]], <2 x float>* [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[K:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[J:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x i32>*
+; CHECK-NEXT:    store <2 x i32> [[TMP1]], <2 x i32>* [[TMP2]], align 1
 ; CHECK-NEXT:    ret void
 ;
 entry: