Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -566,6 +566,86 @@ return nullptr; } +static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) { + Constant *Mask = Shuf.getMask(); + unsigned MaskSize = Mask->getType()->getVectorNumElements(); + unsigned VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements(); + + // A vector select does not change the size of the operands. + if (MaskSize != VecSize) + return false; + + for (unsigned i = 0; i != MaskSize; ++i) { + Constant *Elt = Mask->getAggregateElement(i); + if (isa(Elt)) + continue; + + uint64_t EltVal; + if (!match(Elt, m_ConstantInt(EltVal))) + return false; + + // The mask must choose a vector element from one of the source operands + // without crossing vector lanes. + if (EltVal != i && EltVal != i + VecSize) + return false; + } + return true; +} + +/// insertelt (shufflevector X, CVec, Mask), C, CIndex --> +/// shufflevector X, CVec', Mask' +static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) { + // Bail out if the shuffle has more than one use. In that case, we'd be + // replacing the insertelt with a shuffle, and that's not a clear win. + auto *Shuf = dyn_cast(InsElt.getOperand(0)); + if (!Shuf || !Shuf->hasOneUse()) + return nullptr; + + // The shuffle must have a constant vector operand. The insertelt must have a + // constant scalar being inserted at a constant position in the vector. + Constant *ShufConstVec, *InsEltScalar; + uint64_t InsEltIndex; + if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) || + !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) || + !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex))) + return nullptr; + + // Adding an element to an arbitrary shuffle could be expensive, but a shuffle + // that selects elements from vectors without changing lanes is assumed cheap. + // If we're just adding a constant into that shuffle, it will still be cheap. + if (!isShuffleEquivalentToSelect(*Shuf)) + return nullptr; + + // From the above 'select' check, we know that the mask has the same number of + // elements as the vector input operands. We also know that each constant + // input element can not be used more than once by the shuffle. Therefore, + // replace the constant in the shuffle's constant vector with the insertelt + // constant. Replace the constant in the shuffle's mask vector with the + // insertelt index plus the length of the vector (because the constant vector + // operand of a shuffle is always the 2nd operand). + Constant *Mask = Shuf->getMask(); + unsigned NumElts = Mask->getType()->getVectorNumElements(); + SmallVector NewShufElts(NumElts); + SmallVector NewMaskElts(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + if (i == InsEltIndex) { + NewShufElts[i] = InsEltScalar; + Type *Int32Ty = Type::getInt32Ty(Shuf->getContext()); + NewMaskElts[i] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts); + } else { + // Copy over the existing values. + NewShufElts[i] = ShufConstVec->getAggregateElement(i); + NewMaskElts[i] = Mask->getAggregateElement(i); + } + } + + // Create new operands for a shuffle that includes the constant of the + // original insertelt. The old shuffle will be dead now. + Constant *NewShufVec = ConstantVector::get(NewShufElts); + Constant *NewMask = ConstantVector::get(NewMaskElts); + return new ShuffleVectorInst(Shuf->getOperand(0), NewShufVec, NewMask); +} + Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); @@ -625,6 +705,9 @@ return &IE; } + if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE)) + return Shuf; + return nullptr; } Index: test/Transforms/InstCombine/insert-const-shuf.ll =================================================================== --- test/Transforms/InstCombine/insert-const-shuf.ll +++ test/Transforms/InstCombine/insert-const-shuf.ll @@ -1,12 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -instcombine %s | FileCheck %s -; TODO: Eliminate the insertelement. +; Eliminate the insertelement. define <4 x float> @PR29126(<4 x float> %x) { ; CHECK-LABEL: @PR29126( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> %x, <4 x float> , <4 x i32> -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[SHUF]], float 4.200000e+01, i32 3 +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x float> %x, <4 x float> , <4 x i32> ; CHECK-NEXT: ret <4 x float> [[INS]] ; %shuf = shufflevector <4 x float> %x, <4 x float> , <4 x i32> @@ -14,13 +13,11 @@ ret <4 x float> %ins } -; TODO: A chain of inserts should collapse. +; A chain of inserts should collapse. define <4 x float> @twoInserts(<4 x float> %x) { ; CHECK-LABEL: @twoInserts( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> %x, <4 x float> , <4 x i32> -; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[SHUF]], float 4.200000e+01, i32 2 -; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 1.100000e+01, i32 3 +; CHECK-NEXT: [[INS2:%.*]] = shufflevector <4 x float> %x, <4 x float> , <4 x i32> ; CHECK-NEXT: ret <4 x float> [[INS2]] ; %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32>