Index: llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -95,14 +95,9 @@ bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R); /// \brief Try to vectorize a list of operands. - /// \@param BuildVector A list of users to ignore for the purpose of - /// scheduling and cost estimation when NeedExtraction - /// is false. /// \returns true if a value was vectorized. bool tryToVectorizeList(ArrayRef VL, slpvectorizer::BoUpSLP &R, - ArrayRef BuildVector = None, - bool AllowReorder = false, - bool NeedExtraction = false); + bool AllowReorder = false); /// \brief Try to vectorize a chain that may start at the operands of \p I. bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R); Index: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4430,13 +4430,11 @@ if (!A || !B) return false; Value *VL[] = { A, B }; - return tryToVectorizeList(VL, R, None, true); + return tryToVectorizeList(VL, R, true); } bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, - ArrayRef BuildVector, - bool AllowReorder, - bool NeedExtraction) { + bool AllowReorder) { if (VL.size() < 2) return false; @@ -4530,12 +4528,7 @@ << "\n"); ArrayRef Ops = VL.slice(I, OpsWidth); - ArrayRef EmptyArray; - ArrayRef BuildVectorSlice; - if (!BuildVector.empty()) - BuildVectorSlice = BuildVector.slice(I, OpsWidth); - - R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice); + R.buildTree(Ops); // TODO: check if we can allow reordering for more cases. if (AllowReorder && R.shouldReorder()) { // Conceptually, there is nothing actually preventing us from trying to @@ -4543,7 +4536,6 @@ // reductions. However, at this point, we only expect to get here when // there are exactly two operations. assert(Ops.size() == 2); - assert(BuildVectorSlice.empty()); Value *ReorderedOps[] = {Ops[1], Ops[0]}; R.buildTree(ReorderedOps, None); } @@ -4563,31 +4555,7 @@ << " and with tree size " << ore::NV("TreeSize", R.getTreeSize())); - Value *VectorizedRoot = R.vectorizeTree(); - - // Reconstruct the build vector by extracting the vectorized root. This - // way we handle the case where some elements of the vector are - // undefined. - // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) - if (!BuildVectorSlice.empty()) { - // The insert point is the last build vector instruction. The - // vectorized root will precede it. This guarantees that we get an - // instruction. The vectorized tree could have been constant folded. - Instruction *InsertAfter = cast(BuildVectorSlice.back()); - unsigned VecIdx = 0; - for (auto &V : BuildVectorSlice) { - IRBuilder Builder(InsertAfter->getParent(), - ++BasicBlock::iterator(InsertAfter)); - Instruction *I = cast(V); - assert(isa(I) || isa(I)); - Instruction *Extract = - cast(Builder.CreateExtractElement( - VectorizedRoot, Builder.getInt32(VecIdx++))); - I->setOperand(1, Extract); - I->moveAfter(Extract); - InsertAfter = I; - } - } + R.vectorizeTree(); // Move to the next bundle. I += VF - 1; NextInst = I + 1; @@ -5508,11 +5476,9 @@ /// /// Returns true if it matches static bool findBuildVector(InsertElementInst *LastInsertElem, - SmallVectorImpl &BuildVector, SmallVectorImpl &BuildVectorOpds) { Value *V = nullptr; do { - BuildVector.push_back(LastInsertElem); BuildVectorOpds.push_back(LastInsertElem->getOperand(1)); V = LastInsertElem->getOperand(0); if (isa(V)) @@ -5521,7 +5487,6 @@ if (!LastInsertElem || !LastInsertElem->hasOneUse()) return false; } while (true); - std::reverse(BuildVector.begin(), BuildVector.end()); std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); return true; } @@ -5530,11 +5495,9 @@ /// /// \return true if it matches. static bool findBuildAggregate(InsertValueInst *IV, - SmallVectorImpl &BuildVector, SmallVectorImpl &BuildVectorOpds) { Value *V; do { - BuildVector.push_back(IV); BuildVectorOpds.push_back(IV->getInsertedValueOperand()); V = IV->getAggregateOperand(); if (isa(V)) @@ -5543,7 +5506,6 @@ if (!IV || !IV->hasOneUse()) return false; } while (true); - std::reverse(BuildVector.begin(), BuildVector.end()); std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); return true; } @@ -5719,27 +5681,25 @@ if (!R.canMapToVector(IVI->getType(), DL)) return false; - SmallVector BuildVector; SmallVector BuildVectorOpds; - if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds)) + if (!findBuildAggregate(IVI, BuildVectorOpds)) return false; DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n"); // Aggregate value is unlikely to be processed in vector register, we need to // extract scalars into scalar registers, so NeedExtraction is set true. - return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true); + return tryToVectorizeList(BuildVectorOpds, R); } bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB, BoUpSLP &R) { - SmallVector BuildVector; SmallVector BuildVectorOpds; - if (!findBuildVector(IEI, BuildVector, BuildVectorOpds)) + if (!findBuildVector(IEI, BuildVectorOpds)) return false; // Vectorize starting with the build vector operands ignoring the BuildVector // instructions for the purpose of scheduling and user extraction. - return tryToVectorizeList(BuildVectorOpds, R, BuildVector); + return tryToVectorizeList(BuildVectorOpds, R); } bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, @@ -5817,8 +5777,8 @@ // is done when there are exactly two elements since tryToVectorizeList // asserts that there are only two values when AllowReorder is true. bool AllowReorder = NumElts == 2; - if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, - None, AllowReorder)) { + if (NumElts > 1 && + tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, AllowReorder)) { // Success start over because instructions might have been changed. HaveVectorizedPhiNodes = true; Changed = true; Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35777.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35777.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35777.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -verify -slp-vectorizer -o - -S -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s + +@global = local_unnamed_addr global [6 x double] zeroinitializer, align 16 + +define { i64, i64 } @patatino(double %arg) { +; CHECK-LABEL: @patatino( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16 +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = trunc <2 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP12]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i64, i64 } [[TMP16]], i64 [[TMP14]], 1 +; CHECK-NEXT: ret { i64, i64 } [[TMP17]] +; +bb: + %tmp = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 0), align 16 + %tmp1 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2), align 16 + %tmp2 = fmul double %tmp1, %arg + %tmp3 = fadd double %tmp, %tmp2 + %tmp4 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4), align 16 + %tmp5 = fadd double %tmp4, %tmp3 + %tmp6 = fptosi double %tmp5 to i32 + %tmp7 = sext i32 %tmp6 to i64 + %tmp8 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 1), align 8 + %tmp9 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 3), align 8 + %tmp10 = fmul double %tmp9, %arg + %tmp11 = fadd double %tmp8, %tmp10 + %tmp12 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 5), align 8 + %tmp13 = fadd double %tmp12, %tmp11 + %tmp14 = fptosi double %tmp13 to i32 + %tmp15 = sext i32 %tmp14 to i64 + %tmp16 = insertvalue { i64, i64 } undef, i64 %tmp7, 0 + %tmp17 = insertvalue { i64, i64 } %tmp16, i64 %tmp15, 1 + ret { i64, i64 } %tmp17 +} Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -387,25 +387,25 @@ ; to do this backwards this backwards define <4 x i32> @reconstruct(<4 x i32> %c) #0 { ; CHECK-LABEL: @reconstruct( -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 -; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[C]], i32 1 -; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 2 -; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[C]], i32 3 -; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[C]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[C]], i32 0 +; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 +; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP3]], i32 1 +; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP2]], i32 2 +; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP1]], i32 3 ; CHECK-NEXT: ret <4 x i32> [[RD]] ; ; ZEROTHRESH-LABEL: @reconstruct( -; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 -; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0 -; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[C]], i32 1 -; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1 -; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 2 -; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2 -; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[C]], i32 3 -; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3 +; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 +; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 +; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[C0]], i32 0 +; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1 +; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2 +; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3 ; ZEROTHRESH-NEXT: ret <4 x i32> [[RD]] ; %c0 = extractelement <4 x i32> %c, i32 0 Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/insertvalue.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/insertvalue.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/insertvalue.ll @@ -19,8 +19,8 @@ ; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1 ; CHECK-NEXT: [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1 ; CHECK-NEXT: [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1 ; CHECK-NEXT: store [2 x double] [[I1]], [2 x double]* [[TMP0:%.*]], align 4 ; CHECK-NEXT: ret void @@ -72,12 +72,12 @@ ; CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3 ; CHECK-NEXT: [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1 ; CHECK-NEXT: [[I1:%.*]] = insertvalue [4 x float] [[I0]], float [[TMP13]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2 ; CHECK-NEXT: [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3 ; CHECK-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3 ; CHECK-NEXT: store [4 x float] [[I3]], [4 x float]* [[TMP0:%.*]], align 4 ; CHECK-NEXT: ret void @@ -134,12 +134,12 @@ ; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x float], [4 x float]* [[B]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3 ; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x float] [[C_ARR0]], float [[TMP6]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 ; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3 ; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3 ; CHECK-NEXT: store [4 x float] [[C_ARR3]], [4 x float]* [[C:%.*]], align 4 ; CHECK-NEXT: ret void @@ -178,12 +178,12 @@ ; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i32], [4 x i32]* [[B]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 ; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1 ; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i32] [[C_ARR0]], i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2 ; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 ; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3 ; CHECK-NEXT: store [4 x i32] [[C_ARR3]], [4 x i32]* [[C:%.*]], align 4 ; CHECK-NEXT: ret void @@ -273,12 +273,12 @@ ; CHECK-NEXT: [[B_STRUCT:%.*]] = load [[PSEUDOVEC]], %pseudovec* [[B]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3 ; CHECK-NEXT: [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct0, float [[TMP6]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 ; CHECK-NEXT: [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct1, float [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3 ; CHECK-NEXT: [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct2, float [[TMP8]], 3 ; CHECK-NEXT: store [[PSEUDOVEC]] %c_struct3, %pseudovec* [[C:%.*]], align 4 ; CHECK-NEXT: ret void Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/value-bug.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/value-bug.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: br label [[BB283:%.*]] ; CHECK: bb283: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP15:%.*]], [[EXIT]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP13:%.*]], [[EXIT]] ] ; CHECK-NEXT: br label [[BB284:%.*]] ; CHECK: bb284: ; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double> @@ -35,10 +35,10 @@ ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> undef, [[TMP9]] ; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> undef, i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> undef, i32 1 -; CHECK-NEXT: [[TMP15]] = insertelement <2 x float> [[TMP13]], float [[TMP14]], i32 1 +; CHECK-NEXT: [[TMP317:%.*]] = fptrunc double undef to float +; CHECK-NEXT: [[TMP319:%.*]] = fptrunc double undef to float +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP317]], i32 0 +; CHECK-NEXT: [[TMP13]] = insertelement <2 x float> [[TMP12]], float [[TMP319]], i32 1 ; CHECK-NEXT: br label [[BB283]] ; bb279: @@ -95,10 +95,8 @@ define <4 x double> @constant_folding() { ; CHECK-LABEL: @constant_folding( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> , i32 0 -; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> , i32 1 -; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP1]], i32 0 +; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 1 +; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0 ; CHECK-NEXT: ret <4 x double> [[I2]] ; entry: