Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -645,6 +645,36 @@ return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask); } +/// If we have an insertelement instruction feeding into another insertelement +/// and the 2nd is inserting a constant into the vector, canonicalize that +/// constant insertion before the insertion of a variable: +/// +/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 --> +/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1 +/// +/// This has the potential of eliminating the 2nd insertelement instruction +/// via constant folding of the scalar constant into a vector constant. +static Instruction *hoistInsEltConst(InsertElementInst &InsElt2, + InstCombiner::BuilderTy &Builder) { + auto *InsElt1 = dyn_cast(InsElt2.getOperand(0)); + if (!InsElt1 || !InsElt1->hasOneUse()) + return nullptr; + + Value *X, *Y; + Constant *ScalarC; + ConstantInt *IdxC1, *IdxC2; + if (match(InsElt1->getOperand(0), m_Value(X)) && + match(InsElt1->getOperand(1), m_Value(Y)) && !isa(Y) && + match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) && + match(InsElt2.getOperand(1), m_Constant(ScalarC)) && + match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) { + Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2); + return InsertElementInst::Create(NewInsElt1, Y, IdxC1); + } + + return nullptr; +} + /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex /// --> shufflevector X, CVec', Mask' static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) { @@ -806,6 +836,9 @@ if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE)) return Shuf; + if (Instruction *NewInsElt = hoistInsEltConst(IE, *Builder)) + return NewInsElt; + // Turn a sequence of inserts that broadcasts a scalar into a single // insert + shufflevector. if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE)) Index: llvm/trunk/test/Transforms/InstCombine/bitcast-bigendian.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/bitcast-bigendian.ll +++ llvm/trunk/test/Transforms/InstCombine/bitcast-bigendian.ll @@ -81,9 +81,8 @@ define <2 x float> @test6(float %A){ ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float %A, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float 4.200000e+01, i32 1 -; CHECK-NEXT: ret <2 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> , float %A, i32 0 +; CHECK-NEXT: ret <2 x float> [[TMP1]] ; %tmp23 = bitcast float %A to i32 %tmp24 = zext i32 %tmp23 to i64 Index: llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -86,11 +86,8 @@ define <8 x i16> @pr26015(<4 x i16> %t0) { ; CHECK-LABEL: @pr26015( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> %t0, i32 2 -; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x i16> , i16 [[TMP2]], i32 3 -; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> [[T2]], i16 0, i32 6 -; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> , <8 x i16> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[T5]] ; %t1 = extractelement <4 x i16> %t0, i32 2 @@ -110,8 +107,7 @@ ; CHECK-NEXT: br i1 %b, label %if, label %end ; CHECK: if: ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> -; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x i16> , i16 [[T1]], i32 3 -; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> [[T2]], i16 0, i32 6 +; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> , i16 [[T1]], i32 3 ; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[T5]] ; CHECK: end: Index: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll +++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -182,10 +182,9 @@ define <2 x float> @test_fptrunc(double %f) { ; CHECK-LABEL: @test_fptrunc( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %f, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 0.000000e+00, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float> -; CHECK-NEXT: ret <2 x float> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> , double %f, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[TMP2]] ; %tmp9 = insertelement <4 x double> undef, double %f, i32 0 %tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1 @@ -198,10 +197,9 @@ define <2 x double> @test_fpext(float %f) { ; CHECK-LABEL: @test_fpext( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float %f, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float 0.000000e+00, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double> -; CHECK-NEXT: ret <2 x double> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> , float %f, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %tmp9 = insertelement <4 x float> undef, float %f, i32 0 %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1 @@ -223,8 +221,7 @@ define <4 x float> @test_select(float %f, float %g) { ; CHECK-LABEL: @test_select( -; CHECK-NEXT: [[A0:%.*]] = insertelement <4 x float> undef, float %f, i32 0 -; CHECK-NEXT: [[A3:%.*]] = insertelement <4 x float> [[A0]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[A3:%.*]] = insertelement <4 x float> , float %f, i32 0 ; CHECK-NEXT: [[RET:%.*]] = shufflevector <4 x float> [[A3]], <4 x float> , <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RET]] ; Index: llvm/trunk/test/Transforms/InstCombine/vector_insertelt_shuffle.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/vector_insertelt_shuffle.ll +++ llvm/trunk/test/Transforms/InstCombine/vector_insertelt_shuffle.ll @@ -12,10 +12,12 @@ ret <4 x float> %ins2 } +; Insert of a constant is canonicalized ahead of insert of a variable. + define <4 x float> @bar(<4 x float> %x, float %a) { ; CHECK-LABEL: @bar( -; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> %x, float %a, i32 1 -; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %x, float 2.000000e+00, i32 2 +; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 1 ; CHECK-NEXT: ret <4 x float> [[INS2]] ; %ins1 = insertelement<4 x float> %x, float %a, i32 1