Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1135,6 +1135,29 @@ return true; } +/// If a shuffle operand is an insert element to a lane that is not accessed +/// by the shuffle, then bypass the insert element and use the insert's source +/// operand vector directly. This is a specialization of a demanded elements +/// fold that does not require hasOneUse(). +static Value *replaceInsertEltShuffleOperand(Value *ShufOp, bool IsRHS, + const SmallVectorImpl &Mask) { + if (auto *InsElt = dyn_cast(ShufOp)) { + if (auto *InsertIndexOp = dyn_cast(InsElt->getOperand(2))) { + // Translate the insert index to the shuffle's mask element values. + int InsertIndex = InsertIndexOp->getZExtValue(); + if (IsRHS) + InsertIndex += ShufOp->getType()->getVectorNumElements(); + + // If the inserted value is not in the shuffle mask, then the + // insertelement does not affect the result of the shuffle. + if (none_of(Mask, [&](int Elt) { return Elt == InsertIndex; })) + return InsElt->getOperand(0); + } + } + + return nullptr; +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -1158,6 +1181,17 @@ MadeChange = true; } + // shuffle (insertelement V1, S, C), V2, M --> shuffle V1, V2, M + if (Value *NewOp = replaceInsertEltShuffleOperand(LHS, false, Mask)) { + SVI.setOperand(0, NewOp); + return &SVI; + } + // shuffle V1, (insertelement V2, S, C), M --> shuffle V1, V2, M + if (Value *NewOp = replaceInsertEltShuffleOperand(RHS, true, Mask)) { + SVI.setOperand(1, NewOp); + return &SVI; + } + unsigned LHSWidth = LHS->getType()->getVectorNumElements(); // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') Index: test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- test/Transforms/InstCombine/vec_demanded_elts.ll +++ test/Transforms/InstCombine/vec_demanded_elts.ll @@ -146,9 +146,7 @@ define <4 x float> @inselt_shuf_no_demand(float %a1, float %a2, float %a3) { ; CHECK-LABEL: @inselt_shuf_no_demand( -; CHECK-NEXT: [[OUT1:%.*]] = insertelement <4 x float> undef, float %a1, i32 1 -; CHECK-NEXT: [[OUT12:%.*]] = insertelement <4 x float> [[OUT1]], float %a2, i32 2 -; CHECK-NEXT: ret <4 x float> [[OUT12]] +; CHECK-NEXT: ret <4 x float> undef ; %out1 = insertelement <4 x float> undef, float %a1, i32 1 %out12 = insertelement <4 x float> %out1, float %a2, i32 2 @@ -161,9 +159,7 @@ define <4 x float> @inselt_shuf_no_demand_commute(float %a1, float %a2, float %a3) { ; CHECK-LABEL: @inselt_shuf_no_demand_commute( -; CHECK-NEXT: [[OUT1:%.*]] = insertelement <4 x float> undef, float %a1, i32 1 -; CHECK-NEXT: [[OUT12:%.*]] = insertelement <4 x float> [[OUT1]], float %a2, i32 2 -; CHECK-NEXT: ret <4 x float> [[OUT12]] +; CHECK-NEXT: ret <4 x float> undef ; %out1 = insertelement <4 x float> undef, float %a1, i32 1 %out12 = insertelement <4 x float> %out1, float %a2, i32 2