Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1048,6 +1048,8 @@ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } bool NewUndefElts = false; + unsigned LHSIdx = -1u; + unsigned RHSIdx = -1u; for (unsigned i = 0; i < VWidth; i++) { unsigned MaskVal = Shuffle->getMaskValue(i); if (MaskVal == -1u) { @@ -1059,15 +1061,50 @@ if (UndefElts4[MaskVal]) { NewUndefElts = true; UndefElts.setBit(i); - } + } else + LHSIdx = LHSIdx == -1u ? MaskVal : LHSVWidth; } else { if (UndefElts3[MaskVal - LHSVWidth]) { NewUndefElts = true; UndefElts.setBit(i); - } + } else + RHSIdx = RHSIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth; } } + // Try to transform shuffle with constant vector and single element from + // this constant vector to single insertelement instruction. + // shufflevector V, C, -> + // insertelement V, C[ci], ci-n + if (LHSVWidth == Shuffle->getType()->getNumElements()) { + Value *Op = nullptr; + Constant *Value = nullptr; + unsigned Idx = -1u; + + // Find constant vector wigth the single element in shuffle (LHS or RHS). + if (LHSIdx < LHSVWidth) { + if (auto *CV = dyn_cast(Shuffle->getOperand(0))) { + Op = Shuffle->getOperand(1); + Value = CV->getOperand(LHSIdx); + Idx = LHSIdx; + } + } + if (RHSIdx < LHSVWidth) { + if (auto *CV = dyn_cast(Shuffle->getOperand(1))) { + Op = Shuffle->getOperand(0); + Value = CV->getOperand(RHSIdx); + Idx = RHSIdx; + } + } + // Found constant vector with single element - convert to insertelement. + if (Op && Value) { + Instruction *New = InsertElementInst::Create( + Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx), + Shuffle->getName()); + InsertNewInstWith(New, *Shuffle); + return New; + } + } if (NewUndefElts) { // Add additional discovered undefs. SmallVector Elts; Index: test/Transforms/InstCombine/insert-const-shuf.ll =================================================================== --- test/Transforms/InstCombine/insert-const-shuf.ll +++ test/Transforms/InstCombine/insert-const-shuf.ll @@ -17,7 +17,9 @@ define <4 x float> @twoInserts(<4 x float> %x) { ; CHECK-LABEL: @twoInserts( -; CHECK-NEXT: [[INS2:%.*]] = shufflevector <4 x float> %x, <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHUF1:%.*]] = insertelement <4 x float> %x, float 0.000000e+00, i32 1 +; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[SHUF1]], float 4.200000e+01, i32 2 +; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 1.100000e+01, i32 3 ; CHECK-NEXT: ret <4 x float> [[INS2]] ; %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> Index: test/Transforms/InstCombine/x86-insertps.ll =================================================================== --- test/Transforms/InstCombine/x86-insertps.ll +++ test/Transforms/InstCombine/x86-insertps.ll @@ -74,7 +74,7 @@ define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: @insertps_0xc1( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %v1, float 0.000000e+00, i32 0 ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)