diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2289,6 +2289,19 @@ unsigned VWidth = cast(SVI.getType())->getNumElements(); unsigned LHSWidth = cast(LHS->getType())->getNumElements(); + + // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask) + // + // + Value *X, *Y; + if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) && + X->getType()->isVectorTy() && + cast(X->getType())->getNumElements() == VWidth && + X->getType() == Y->getType()) { + Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask()); + return new BitCastInst(V, SVI.getType()); + } + ArrayRef Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); @@ -2298,7 +2311,6 @@ // TODO: This could be extended to allow length-changing shuffles. // The transform might also be obsoleted if we allowed canonicalization // of bitcasted shuffles. - Value *X; if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) && X->getType()->isVectorTy() && VWidth == LHSWidth) { // Try to create a scaled mask constant. diff --git a/llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll b/llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll --- a/llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll +++ b/llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll @@ -5,11 +5,9 @@ ; CHECK-LABEL: @vtrn1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[I:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP3]], <2 x float>* [[TMP4]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP1]], <2 x i32>* [[TMP2]], align 1 ; CHECK-NEXT: ret void ; { diff --git a/llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll b/llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll --- a/llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll +++ b/llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll @@ -4,13 +4,10 @@ define void @vtrn2([8 x i8]* nocapture %result, i32 %i, i32 %j, i32 %k) { ; CHECK-LABEL: @vtrn2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[J:%.*]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[K:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <2 x float> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP4]], <2 x float>* [[TMP5]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[K:%.*]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[J:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP1]], <2 x i32>* [[TMP2]], align 1 ; CHECK-NEXT: ret void ; entry: