Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1531,6 +1531,71 @@ return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask)); } +/// Try to replace a shuffle with an insertelement. +static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) { + Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1); + SmallVector Mask = Shuf.getShuffleMask(); + + // The shuffle must not change vector sizes. + // TODO: This restriction could be removed if the insert has only one use + // (because the transform would require a new length-changing shuffle). + int NumElts = Mask.size(); + if (NumElts != (int)(V0->getType()->getVectorNumElements())) + return nullptr; + + // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC' + Value *Scalar; + ConstantInt *IndexC; + auto isShufflingScalarIntoOp1 = [&]() { + // We need an insertelement with a constant index. + if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar), + m_ConstantInt(IndexC)))) + return false; + + // Test the shuffle mask to see if it splices the inserted scalar into the + // operand 1 vector of the shuffle. + int NewInsIndex = -1; + for (int i = 0; i != NumElts; ++i) { + // Ignore undef mask elements. + if (Mask[i] == -1) + continue; + + // The shuffle takes elements of operand 1 without lane changes. + if (Mask[i] == NumElts + i) + continue; + + // The shuffle must choose the inserted scalar exactly once. + if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue()) + return false; + + // The shuffle is placing the inserted scalar into element i. + NewInsIndex = i; + } + + assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?"); + + // Index is updated to the potentially translated insertion lane. + IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex); + return true; + }; + + // If the shuffle is unnecessary, insert the scalar operand directly into + // operand 1 of the shuffle. Example: + // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0 + if (isShufflingScalarIntoOp1()) + return InsertElementInst::Create(V1, Scalar, IndexC); + + // Try again after commuting shuffle. Example: + // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> --> + // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3 + std::swap(V0, V1); + ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); + if (isShufflingScalarIntoOp1()) + return InsertElementInst::Create(V1, Scalar, IndexC); + + return nullptr; +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -1556,6 +1621,11 @@ if (Instruction *I = foldIdentityExtractShuffle(SVI)) return I; + // This transform has the potential to lose undef knowledge, so it is + // intentionally placed after SimplifyDemandedVectorElts(). + if (Instruction *I = foldShuffleWithInsert(SVI)) + return I; + SmallVector Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); unsigned LHSWidth = LHS->getType()->getVectorNumElements(); Index: test/Transforms/InstCombine/insert-extract-shuffle.ll =================================================================== --- test/Transforms/InstCombine/insert-extract-shuffle.ll +++ test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -303,12 +303,11 @@ ret <4 x float> %v3 } -; TODO: Simplest case - insert scalar into undef, then shuffle that value in place into another vector. +; Simplest case - insert scalar into undef, then shuffle that value in place into another vector. define <4 x float> @insert_shuffle(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_shuffle( -; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 0 ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> undef, float %x, i32 0 @@ -316,12 +315,11 @@ ret <4 x float> %r } -; TODO: Insert scalar into some element of a dummy vector, then move it to a different element in another vector. +; Insert scalar into some element of a dummy vector, then move it to a different element in another vector. define <4 x float> @insert_shuffle_translate(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_shuffle_translate( -; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 1 ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> undef, float %x, i32 0 @@ -329,12 +327,11 @@ ret <4 x float> %r } -; TODO: The vector operand of the insert is irrelevant. +; The vector operand of the insert is irrelevant. define <4 x float> @insert_not_undef_shuffle_translate(float %x, <4 x float> %y, <4 x float> %q) { ; CHECK-LABEL: @insert_not_undef_shuffle_translate( -; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 3 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 2 ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> %q, float %x, i32 3 @@ -342,12 +339,11 @@ ret <4 x float> %r } -; TODO: The insert may be the 2nd operand of the shuffle. The shuffle mask can include undef elements. +; The insert may be the 2nd operand of the shuffle. The shuffle mask can include undef elements. define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) { ; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute( -; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 2 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[XV]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 1 ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> %q, float %x, i32 2 @@ -355,13 +351,12 @@ ret <4 x float> %r } -; TODO: Both shuffle operands may be inserts - choose the correct side. +; Both shuffle operands may be inserts - choose the correct side. define <4 x float> @insert_insert_shuffle_translate(float %x1, float %x2, <4 x float> %q) { ; CHECK-LABEL: @insert_insert_shuffle_translate( -; CHECK-NEXT: [[XV1:%.*]] = insertelement <4 x float> undef, float [[X1:%.*]], i32 0 ; CHECK-NEXT: [[XV2:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X2:%.*]], i32 2 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV1]], <4 x float> [[XV2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[XV2]], float [[X1:%.*]], i32 1 ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv1 = insertelement <4 x float> %q, float %x1, i32 0 @@ -370,13 +365,12 @@ ret <4 x float> %r } -; TODO: Both shuffle operands may be inserts - choose the correct side. +; Both shuffle operands may be inserts - choose the correct side. define <4 x float> @insert_insert_shuffle_translate_commute(float %x1, float %x2, <4 x float> %q) { ; CHECK-LABEL: @insert_insert_shuffle_translate_commute( ; CHECK-NEXT: [[XV1:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X1:%.*]], i32 0 -; CHECK-NEXT: [[XV2:%.*]] = insertelement <4 x float> undef, float [[X2:%.*]], i32 2 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV1]], <4 x float> [[XV2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[XV1]], float [[X2:%.*]], i32 1 ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv1 = insertelement <4 x float> %q, float %x1, i32 0 @@ -385,6 +379,9 @@ ret <4 x float> %r } +; Negative test - this only works if the shuffle is choosing exactly 1 element from 1 of the inputs. +; TODO: But this could be a special-case because we're inserting into the same base vector. + define <4 x float> @insert_insert_shuffle_translate_wrong_mask(float %x1, float %x2, <4 x float> %q) { ; CHECK-LABEL: @insert_insert_shuffle_translate_wrong_mask( ; CHECK-NEXT: [[XV1:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X1:%.*]], i32 0 @@ -398,7 +395,7 @@ ret <4 x float> %r } -; TODO: The insert may have other uses. +; The insert may have other uses. declare void @use(<4 x float>) @@ -406,7 +403,7 @@ ; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute_uses( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X:%.*]], i32 2 ; CHECK-NEXT: call void @use(<4 x float> [[XV]]) -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[XV]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X]], i32 0 ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> %q, float %x, i32 2 @@ -415,6 +412,8 @@ ret <4 x float> %r } +; Negative test - size-changing shuffle. + define <5 x float> @insert_not_undef_shuffle_translate_commute_lengthen(float %x, <4 x float> %y, <4 x float> %q) { ; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute_lengthen( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 2