Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -702,44 +702,64 @@ ConstantVector::get(NewMaskElts)); } else if (auto *IEI = dyn_cast(Inst)) { // Transform sequences of insertelements ops with constant data/indexes into - // a single shuffle op. + // a single shuffle op or a single insertelement. unsigned NumElts = InsElt.getType()->getNumElements(); - uint64_t InsertIdx[2]; Constant *Val[2]; - if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) || - !match(InsElt.getOperand(1), m_Constant(Val[0])) || - !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) || - !match(IEI->getOperand(1), m_Constant(Val[1]))) + if (!match(InsElt.getOperand(1), m_Constant(Val[0])) || + !match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) || + !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1]))) return nullptr; - SmallVector Values(NumElts); - SmallVector Mask(NumElts); - auto ValI = std::begin(Val); - // Generate new constant vector and mask. - // We have 2 values/masks from the insertelements instructions. Insert them - // into new value/mask vectors. - for (uint64_t I : InsertIdx) { - if (!Values[I]) { - assert(!Mask[I]); - Values[I] = *ValI; - Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), - NumElts + I); - } - ++ValI; - } - // Remaining values are filled with 'undef' values. - for (unsigned I = 0; I < NumElts; ++I) { - if (!Values[I]) { - assert(!Mask[I]); - Values[I] = UndefValue::get(InsElt.getType()->getElementType()); - Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I); + + if (match(IEI->getOperand(1), m_Constant(Val[1]))) { + // inselt1 (inselt2 X, Val1C, Idx1C), Val0C, Idx0C --> shuffle + SmallVector Values(NumElts); + SmallVector Mask(NumElts); + auto ValI = std::begin(Val); + // Generate new constant vector and mask. + // We have 2 values/masks from the insertelements instructions. Insert + // them into new value/mask vectors. + for (uint64_t I : InsertIdx) { + if (!Values[I]) { + assert(!Mask[I]); + Values[I] = *ValI; + Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), + NumElts + I); + } + ++ValI; } + // Remaining values are filled with 'undef' values. + for (unsigned I = 0; I < NumElts; ++I) { + if (!Values[I]) { + assert(!Mask[I]); + Values[I] = UndefValue::get(InsElt.getType()->getElementType()); + Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I); + } + } + // Create new operands for a shuffle that includes the constant of the + // original insertelt. + return new ShuffleVectorInst(IEI->getOperand(0), + ConstantVector::get(Values), + ConstantVector::get(Mask)); + } + if (match(IEI->getOperand(0), m_Constant(Val[1])) && + InsertIdx[0] != InsertIdx[1]) { + // If we're inserting a scalar constant into a vector of constants with + // one variable element, eliminate the 2nd insert by putting the scalar + // constant directly into the vector constant: + // ins0 (ins1 Val1C, X, Idx1C), Val0C, Idx0C --> ins Val1C', X, Idx1C + SmallVector NewVectorC(NumElts); + for (unsigned I = 0; I != NumElts; ++I) { + if (I == InsertIdx[0]) + // Replace the old element with the constant from the 2nd insert. + NewVectorC[I] = Val[0]; + else + // Copy over the existing values. + NewVectorC[I] = Val[1]->getAggregateElement(I); + } + return InsertElementInst::Create(ConstantVector::get(NewVectorC), + IEI->getOperand(1), IEI->getOperand(2)); } - // Create new operands for a shuffle that includes the constant of the - // original insertelt. - return new ShuffleVectorInst(IEI->getOperand(0), - ConstantVector::get(Values), - ConstantVector::get(Mask)); } return nullptr; } Index: test/Transforms/InstCombine/bitcast-bigendian.ll =================================================================== --- test/Transforms/InstCombine/bitcast-bigendian.ll +++ test/Transforms/InstCombine/bitcast-bigendian.ll @@ -81,9 +81,8 @@ define <2 x float> @test6(float %A){ ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float %A, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float 4.200000e+01, i32 1 -; CHECK-NEXT: ret <2 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> , float %A, i32 0 +; CHECK-NEXT: ret <2 x float> [[TMP1]] ; %tmp23 = bitcast float %A to i32 %tmp24 = zext i32 %tmp23 to i64 Index: test/Transforms/InstCombine/insert-extract-shuffle.ll =================================================================== --- test/Transforms/InstCombine/insert-extract-shuffle.ll +++ test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -86,11 +86,8 @@ define <8 x i16> @pr26015(<4 x i16> %t0) { ; CHECK-LABEL: @pr26015( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> %t0, i32 2 -; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x i16> , i16 [[TMP2]], i32 3 -; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> [[T2]], i16 0, i32 6 -; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> , <8 x i16> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[T5]] ; %t1 = extractelement <4 x i16> %t0, i32 2 @@ -110,8 +107,7 @@ ; CHECK-NEXT: br i1 %b, label %if, label %end ; CHECK: if: ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> -; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x i16> , i16 [[T1]], i32 3 -; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> [[T2]], i16 0, i32 6 +; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> , i16 [[T1]], i32 3 ; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[T5]] ; CHECK: end: Index: test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- test/Transforms/InstCombine/vec_demanded_elts.ll +++ test/Transforms/InstCombine/vec_demanded_elts.ll @@ -182,10 +182,9 @@ define <2 x float> @test_fptrunc(double %f) { ; CHECK-LABEL: @test_fptrunc( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %f, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 0.000000e+00, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float> -; CHECK-NEXT: ret <2 x float> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> , double %f, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[TMP2]] ; %tmp9 = insertelement <4 x double> undef, double %f, i32 0 %tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1 @@ -198,10 +197,9 @@ define <2 x double> @test_fpext(float %f) { ; CHECK-LABEL: @test_fpext( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float %f, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float 0.000000e+00, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double> -; CHECK-NEXT: ret <2 x double> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> , float %f, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %tmp9 = insertelement <4 x float> undef, float %f, i32 0 %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1 @@ -223,8 +221,7 @@ define <4 x float> @test_select(float %f, float %g) { ; CHECK-LABEL: @test_select( -; CHECK-NEXT: [[A0:%.*]] = insertelement <4 x float> undef, float %f, i32 0 -; CHECK-NEXT: [[A3:%.*]] = insertelement <4 x float> [[A0]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[A3:%.*]] = insertelement <4 x float> , float %f, i32 0 ; CHECK-NEXT: [[RET:%.*]] = shufflevector <4 x float> [[A3]], <4 x float> , <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RET]] ;