Index: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1660,7 +1660,13 @@ int DeadCost = 0; for (unsigned i = 0, e = VL.size(); i < e; ++i) { Instruction *E = cast(VL[i]); - if (E->hasOneUse()) + // If all users are going to be vectorized, instruction can be + // considered as dead. + // The same, if have only one user, it will be vectorized for sure. + if (E->hasOneUse() || + std::all_of(E->user_begin(), E->user_end(), [this](User *U) { + return ScalarToTreeEntry.count(U) > 0; + })) // Take credit for instruction that will become dead. DeadCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i); Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/extractelement.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -7,11 +7,10 @@ define float @f(<2 x float> %x) { ; CHECK-LABEL: @f( -; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X0]] -; CHECK-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]] -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 @@ -24,13 +23,13 @@ define float @f_used_out_of_tree(<2 x float> %x) { ; THRESH2-LABEL: @f_used_out_of_tree( -; THRESH2-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; THRESH2-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; THRESH2-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X0]] -; THRESH2-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]] -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]] +; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]] +; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH2-NEXT: store float [[ADD]], float* @a -; THRESH2-NEXT: ret float [[X0]] +; THRESH2-NEXT: ret float [[TMP1]] ; %x0 = extractelement <2 x float> %x, i32 0 %x1 = extractelement <2 x float> %x, i32 1 @@ -43,12 +42,15 @@ define float @f_used_twice_in_tree(<2 x float> %x) { ; THRESH1-LABEL: @f_used_twice_in_tree( -; THRESH1-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; THRESH1-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; THRESH1-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X1]] -; THRESH1-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]] -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]] +; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 +; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 +; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 +; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[X]], [[TMP3]] +; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] ; THRESH1-NEXT: ret float [[ADD]] +; %x0 = extractelement <2 x float> %x, i32 0 %x1 = extractelement <2 x float> %x, i32 1 %x0x0 = fmul float %x0, %x1