diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2268,12 +2268,8 @@ SmallVector Mask; Shuf.getShuffleMask(Mask); - // The shuffle must not change vector sizes. - // TODO: This restriction could be removed if the insert has only one use - // (because the transform would require a new length-changing shuffle). int NumElts = Mask.size(); - if (NumElts != (int)(cast(V0->getType())->getNumElements())) - return nullptr; + int InpNumElts = cast(V0->getType())->getNumElements(); // This is a specialization of a fold in SimplifyDemandedVectorElts. We may // not be able to handle it there if the insertelement has >1 use. @@ -2290,11 +2286,16 @@ if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) { // Offset the index constant by the vector width because we are checking for // accesses to the 2nd vector input of the shuffle. - IdxC += NumElts; + IdxC += InpNumElts; // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask if (!is_contained(Mask, (int)IdxC)) return IC.replaceOperand(Shuf, 1, X); } + // For the rest of the transform, the shuffle must not change vector sizes. + // TODO: This restriction could be removed if the insert has only one use + // (because the transform would require a new length-changing shuffle). + if (NumElts != InpNumElts) + return nullptr; // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC' auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) { diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -696,7 +696,7 @@ ; CHECK-LABEL: @insert_undemanded_element_unequal_length_op0( ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i32 3 ; CHECK-NEXT: call void @use(<4 x float> [[INS]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[INS]], <4 x float> [[Y:%.*]], <5 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y:%.*]], <5 x i32> ; CHECK-NEXT: ret <5 x float> [[S]] ; %ins = insertelement <4 x float> %x, float 42.0, i32 3 @@ -709,7 +709,7 @@ ; CHECK-LABEL: @insert_undemanded_element_unequal_length_op1( ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i32 3 ; CHECK-NEXT: call void @use(<4 x float> [[INS]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[INS]], <5 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X]], <5 x i32> ; CHECK-NEXT: ret <5 x float> [[S]] ; %ins = insertelement <4 x float> %x, float 42.0, i32 3