Index: include/llvm/Transforms/Vectorize/SLPVectorizer.h =================================================================== --- include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -96,11 +96,13 @@ /// \brief Try to vectorize a list of operands. /// \@param BuildVector A list of users to ignore for the purpose of - /// scheduling and that don't need extracting. + /// scheduling and cost estimation when NeedExtraction + /// is false. /// \returns true if a value was vectorized. bool tryToVectorizeList(ArrayRef VL, slpvectorizer::BoUpSLP &R, ArrayRef BuildVector = None, - bool AllowReorder = false); + bool AllowReorder = false, + bool NeedExtraction = false); /// \brief Try to vectorize a chain that may start at the operands of \p I. bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R); Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4421,7 +4421,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, ArrayRef BuildVector, - bool AllowReorder) { + bool AllowReorder, + bool NeedExtraction) { if (VL.size() < 2) return false; @@ -4515,11 +4516,12 @@ << "\n"); ArrayRef Ops = VL.slice(I, OpsWidth); + ArrayRef EmptyArray; ArrayRef BuildVectorSlice; if (!BuildVector.empty()) BuildVectorSlice = BuildVector.slice(I, OpsWidth); - R.buildTree(Ops, BuildVectorSlice); + R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice); // TODO: check if we can allow reordering for more cases. if (AllowReorder && R.shouldReorder()) { // Conceptually, there is nothing actually preventing us from trying to @@ -5709,7 +5711,9 @@ return false; DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n"); - return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false); + // Aggregate value is unlikely to be processed in vector register, we need to + // extract scalars into scalar registers, so NeedExtraction is set true. + return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true); } bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, Index: test/Transforms/SLPVectorizer/PowerPC/aggregate.ll =================================================================== --- test/Transforms/SLPVectorizer/PowerPC/aggregate.ll +++ test/Transforms/SLPVectorizer/PowerPC/aggregate.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr9 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s + +%struct.S = type { i8*, i8* } + +@kS0 = common global %struct.S zeroinitializer, align 8 + +define { i64, i64 } @getS() { +entry: + %0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8 + %1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8 + %2 = insertvalue { i64, i64 } undef, i64 %0, 0 + %3 = insertvalue { i64, i64 } %2, i64 %1, 1 + ret { i64, i64 } %3 +} + +; CHECK: load i64 +; CHECK-NOT: load <2 x i64> +; CHECK-NOT: extractelement + Index: test/Transforms/SLPVectorizer/X86/aggregate.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/aggregate.ll +++ test/Transforms/SLPVectorizer/X86/aggregate.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -mtriple=x86_64-unknown-linux -mcpu=corei7 -slp-vectorizer < %s | FileCheck %s + +%struct.S = type { i8*, i8* } + +@kS0 = common global %struct.S zeroinitializer, align 8 + +define { i64, i64 } @getS() { +entry: + %0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8 + %1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8 + %2 = insertvalue { i64, i64 } undef, i64 %0, 0 + %3 = insertvalue { i64, i64 } %2, i64 %1, 1 + ret { i64, i64 } %3 +} + +; CHECK: load i64 +; CHECK-NOT: load <2 x i64> +; CHECK-NOT: extractelement +