Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -566,6 +566,56 @@ return nullptr; } +/// insertelt (shufflevector X, CVec, Mask), C, CIndex --> +/// shufflevector X, CVec', Mask' +static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) { + // Bail out if the shuffle has more than one use. In that case, we'd be + // replacing the insertelt with a shuffle, and that's not a clear win. + auto *Shuf = dyn_cast(InsElt.getOperand(0)); + if (!Shuf || !Shuf->hasOneUse()) + return nullptr; + + // The shuffle must have a constant vector operand. The insertelt must have a + // constant scalar being inserted at a constant position in the vector. + Constant *ShufConstVec, *InsEltScalar; + uint64_t InsEltIndex; + if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) || + !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) || + !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex))) + return nullptr; + + // TODO: This restriction could be loosened to handle a shuffle with a mask + // that has a shorter length than its vector operands. + Constant *Mask = Shuf->getMask(); + unsigned NumElts = Mask->getType()->getVectorNumElements(); + if (ShufConstVec->getType()->getVectorNumElements() != NumElts) + return nullptr; + + // Replace the constant in the shuffle's constant vector with the insertelt + // constant. Replace the constant in the shuffle's mask vector with the + // insertelt index plus the length of the vector (because the constant vector + // operand of the shuffle is always the 2nd operand). + SmallVector NewShufElts(NumElts); + SmallVector NewMaskElts(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + if (i == InsEltIndex) { + NewShufElts[i] = InsEltScalar; + Type *Int32Ty = Type::getInt32Ty(Shuf->getContext()); + NewMaskElts[i] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts); + } else { + // Copy over the existing values. + NewShufElts[i] = ShufConstVec->getAggregateElement(i); + NewMaskElts[i] = Mask->getAggregateElement(i); + } + } + + // Create new operands for a shuffle that includes the constant of the + // original insertelt. The old shuffle will be dead now. + Constant *NewShufVec = ConstantVector::get(NewShufElts); + Constant *NewMask = ConstantVector::get(NewMaskElts); + return new ShuffleVectorInst(Shuf->getOperand(0), NewShufVec, NewMask); +} + Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); @@ -625,6 +675,9 @@ return &IE; } + if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE)) + return Shuf; + return nullptr; } Index: test/Transforms/InstCombine/insert-const-shuf.ll =================================================================== --- test/Transforms/InstCombine/insert-const-shuf.ll +++ test/Transforms/InstCombine/insert-const-shuf.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine %s | FileCheck %s + +; Eliminate the insertelement. + +define <4 x float> @PR29126(<4 x float> %x) { +; CHECK-LABEL: @PR29126( +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x float> %x, <4 x float> , <4 x i32> +; CHECK-NEXT: ret <4 x float> [[INS]] +; + %shuf = shufflevector <4 x float> %x, <4 x float> , <4 x i32> + %ins = insertelement <4 x float> %shuf, float 42.0, i32 3 + ret <4 x float> %ins +} + +define <4 x float> @twoInserts(<4 x float> %x) { +; CHECK-LABEL: @twoInserts( +; CHECK-NEXT: [[INS2:%.*]] = shufflevector <4 x float> %x, <4 x float> , <4 x i32> +; CHECK-NEXT: ret <4 x float> [[INS2]] +; + %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> + %ins1 = insertelement <4 x float> %shuf, float 42.0, i32 2 + %ins2 = insertelement <4 x float> %ins1, float 11.0, i32 3 + ret <4 x float> %ins2 +} + +; Don't transform insert to shuffle if the original shuffle is not removed. +; TODO: Ease the use restriction if the insert scalar would simplify the shuffle to a full vector constant? + +define <3 x float> @twoShufUses(<3 x float> %x) { +; CHECK-LABEL: @twoShufUses( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> %x, <3 x float> , <3 x i32> +; CHECK-NEXT: [[INS:%.*]] = insertelement <3 x float> [[SHUF]], float 4.200000e+01, i2 1 +; CHECK-NEXT: [[ADD:%.*]] = fadd <3 x float> [[SHUF]], [[INS]] +; CHECK-NEXT: ret <3 x float> [[ADD]] +; + %shuf = shufflevector <3 x float> %x, <3 x float> , <3 x i32> + %ins = insertelement <3 x float> %shuf, float 42.0, i2 1 + %add = fadd <3 x float> %shuf, %ins + ret <3 x float> %add +} + +; The inserted scalar constant index is out-of-bounds for the shuffle vector constant. + +define <5 x i8> @longerMask(<3 x i8> %x) { +; CHECK-LABEL: @longerMask( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x i8> %x, <3 x i8> , <5 x i32> +; CHECK-NEXT: [[INS:%.*]] = insertelement <5 x i8> [[SHUF]], i8 42, i17 4 +; CHECK-NEXT: ret <5 x i8> [[INS]] +; + %shuf = shufflevector <3 x i8> %x, <3 x i8> , <5 x i32> + %ins = insertelement <5 x i8> %shuf, i8 42, i17 4 + ret <5 x i8> %ins +} + +; TODO: The inserted constant could get folded into the shuffle vector constant. + +define <3 x i8> @shorterMask(<5 x i8> %x) { +; CHECK-LABEL: @shorterMask( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <5 x i8> %x, <5 x i8> undef, <3 x i32> +; CHECK-NEXT: [[INS:%.*]] = insertelement <3 x i8> [[SHUF]], i8 42, i21 0 +; CHECK-NEXT: ret <3 x i8> [[INS]] +; + %shuf = shufflevector <5 x i8> %x, <5 x i8> , <3 x i32> + %ins = insertelement <3 x i8> %shuf, i8 42, i21 0 + ret <3 x i8> %ins +} +