Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -585,58 +585,104 @@ return true; } -/// insertelt (shufflevector X, CVec, Mask), C, CIndex --> -/// shufflevector X, CVec', Mask' +/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex +/// --> shufflevector X, CVec', Mask' static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) { - // Bail out if the shuffle has more than one use. In that case, we'd be + auto *Inst = dyn_cast(InsElt.getOperand(0)); + // Bail out if the parent has more than one use. In that case, we'd be // replacing the insertelt with a shuffle, and that's not a clear win. - auto *Shuf = dyn_cast(InsElt.getOperand(0)); - if (!Shuf || !Shuf->hasOneUse()) + if (!Inst || !Inst->hasOneUse()) return nullptr; + Value *X = nullptr; + Constant *NewShufVec = nullptr; + Constant *NewMask = nullptr; + if (auto *Shuf = dyn_cast(InsElt.getOperand(0))) { + // The shuffle must have a constant vector operand. The insertelt must have + // a constant scalar being inserted at a constant position in the vector. + Constant *ShufConstVec, *InsEltScalar; + uint64_t InsEltIndex; + if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) || + !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) || + !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex))) + return nullptr; - // The shuffle must have a constant vector operand. The insertelt must have a - // constant scalar being inserted at a constant position in the vector. - Constant *ShufConstVec, *InsEltScalar; - uint64_t InsEltIndex; - if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) || - !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) || - !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex))) - return nullptr; + // Adding an element to an arbitrary shuffle could be expensive, but a + // shuffle that selects elements from vectors without crossing lanes is + // assumed cheap. + // If we're just adding a constant into that shuffle, it will still be + // cheap. + if (!isShuffleEquivalentToSelect(*Shuf)) + return nullptr; - // Adding an element to an arbitrary shuffle could be expensive, but a shuffle - // that selects elements from vectors without crossing lanes is assumed cheap. - // If we're just adding a constant into that shuffle, it will still be cheap. - if (!isShuffleEquivalentToSelect(*Shuf)) - return nullptr; + // From the above 'select' check, we know that the mask has the same number + // of elements as the vector input operands. We also know that each constant + // input element is used in its lane and can not be used more than once by + // the shuffle. Therefore, replace the constant in the shuffle's constant + // vector with the insertelt constant. Replace the constant in the shuffle's + // mask vector with the insertelt index plus the length of the vector + // (because the constant vector operand of a shuffle is always the 2nd + // operand). + Constant *Mask = Shuf->getMask(); + unsigned NumElts = Mask->getType()->getVectorNumElements(); + SmallVector NewShufElts(NumElts); + SmallVector NewMaskElts(NumElts); + for (unsigned I = 0; I != NumElts; ++I) { + if (I == InsEltIndex) { + NewShufElts[I] = InsEltScalar; + Type *Int32Ty = Type::getInt32Ty(Shuf->getContext()); + NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts); + } else { + // Copy over the existing values. + NewShufElts[I] = ShufConstVec->getAggregateElement(I); + NewMaskElts[I] = Mask->getAggregateElement(I); + } + } - // From the above 'select' check, we know that the mask has the same number of - // elements as the vector input operands. We also know that each constant - // input element is used in its lane and can not be used more than once by the - // shuffle. Therefore, replace the constant in the shuffle's constant vector - // with the insertelt constant. Replace the constant in the shuffle's mask - // vector with the insertelt index plus the length of the vector (because the - // constant vector operand of a shuffle is always the 2nd operand). - Constant *Mask = Shuf->getMask(); - unsigned NumElts = Mask->getType()->getVectorNumElements(); - SmallVector NewShufElts(NumElts); - SmallVector NewMaskElts(NumElts); - for (unsigned i = 0; i != NumElts; ++i) { - if (i == InsEltIndex) { - NewShufElts[i] = InsEltScalar; - Type *Int32Ty = Type::getInt32Ty(Shuf->getContext()); - NewMaskElts[i] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts); - } else { - // Copy over the existing values. - NewShufElts[i] = ShufConstVec->getAggregateElement(i); - NewMaskElts[i] = Mask->getAggregateElement(i); + // Create new operands for a shuffle that includes the constant of the + // original insertelt. The old shuffle will be dead now. + X = Shuf->getOperand(0); + NewShufVec = ConstantVector::get(NewShufElts); + NewMask = ConstantVector::get(NewMaskElts); + } else if (auto *IEI = dyn_cast(Inst)) { + // Transform sequences of insertelements ops with constant data/indexes into + // a single shuffle op. + unsigned NumElts = InsElt.getType()->getNumElements(); + + uint64_t InsertIdx[2]; + Constant *Val[2]; + if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) || + !match(InsElt.getOperand(1), m_Constant(Val[0])) || + !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) || + !match(IEI->getOperand(1), m_Constant(Val[1]))) + return nullptr; + SmallVector Values(NumElts); + SmallVector Mask(NumElts); + auto ValI = std::begin(Val); + for (uint64_t I : InsertIdx) { + if (!Values[I]) { + assert(!Mask[I]); + Values[I] = *ValI; + Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), + NumElts + I); + } + ++ValI; } + for (unsigned I = 0; I < NumElts; ++I) { + if (!Values[I]) { + assert(!Mask[I]); + Values[I] = UndefValue::get(InsElt.getType()->getElementType()); + Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I); + } + } + // Create new operands for a shuffle that includes the constant of the + // original insertelt. + X = IEI->getOperand(0); + NewShufVec = ConstantVector::get(Values); + NewMask = ConstantVector::get(Mask); } - - // Create new operands for a shuffle that includes the constant of the - // original insertelt. The old shuffle will be dead now. - Constant *NewShufVec = ConstantVector::get(NewShufElts); - Constant *NewMask = ConstantVector::get(NewMaskElts); - return new ShuffleVectorInst(Shuf->getOperand(0), NewShufVec, NewMask); + if (X && NewShufVec && NewMask) + return new ShuffleVectorInst(X, NewShufVec, NewMask); + return nullptr; } Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Index: test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- test/Transforms/InstCombine/vec_demanded_elts.ll +++ test/Transforms/InstCombine/vec_demanded_elts.ll @@ -182,10 +182,11 @@ define <2 x float> @test_fptrunc(double %f) { ; CHECK-LABEL: @test_fptrunc( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %f, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 0.000000e+00, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float> -; CHECK-NEXT: ret <2 x float> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x double> undef, double %f, i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> , <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fptrunc <4 x double> [[TMP12]] to <4 x float> +; CHECK-NEXT: [[RET:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <2 x i32> +; CHECK-NEXT: ret <2 x float> [[RET]] ; %tmp9 = insertelement <4 x double> undef, double %f, i32 0 %tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1 @@ -198,10 +199,11 @@ define <2 x double> @test_fpext(float %f) { ; CHECK-LABEL: @test_fpext( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float %f, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float 0.000000e+00, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double> -; CHECK-NEXT: ret <2 x double> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> undef, float %f, i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fpext <4 x float> [[TMP12]] to <4 x double> +; CHECK-NEXT: [[RET:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> undef, <2 x i32> +; CHECK-NEXT: ret <2 x double> [[RET]] ; %tmp9 = insertelement <4 x float> undef, float %f, i32 0 %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1 @@ -215,7 +217,7 @@ define <4 x float> @test_select(float %f, float %g) { ; CHECK-LABEL: @test_select( ; CHECK-NEXT: [[A0:%.*]] = insertelement <4 x float> undef, float %f, i32 0 -; CHECK-NEXT: [[A3:%.*]] = insertelement <4 x float> [[A0]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[A3:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> , <4 x i32> ; CHECK-NEXT: [[RET:%.*]] = select <4 x i1> , <4 x float> [[A3]], <4 x float> ; CHECK-NEXT: ret <4 x float> [[RET]] ; Index: test/Transforms/InstCombine/vector_insertelt_shuffle.ll =================================================================== --- test/Transforms/InstCombine/vector_insertelt_shuffle.ll +++ test/Transforms/InstCombine/vector_insertelt_shuffle.ll @@ -7,10 +7,9 @@ ret<4 x float> %ins2 } -; FIXME: insertelements should fold to shuffle +; insertelements should fold to shuffle ; CHECK-LABEL: @foo -; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1 -; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2 +; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> , <4 x i32> ; CHECK-NEXT: ret <4 x float> % define<4 x float> @bar(<4 x float> %x, float %a) { @@ -45,12 +44,11 @@ ret<4 x float> %ins6 } -; FIXME: insertelements should fold to shuffle +; insertelements should fold to shuffle ; CHECK-LABEL: @bazz ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 3 ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 5.000000e+00, i32 % -; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1 -; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2 +; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> , <4 x i32> ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 7.000000e+00, i32 % ; CHECK-NEXT: ret <4 x float> %