diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6076,7 +6076,7 @@ // If a vector register can't hold 1 element, we are done. unsigned MaxVecRegSize = R.getMaxVecRegSize(); - unsigned EltSize = R.getVectorElementSize(Stores[0]); + unsigned EltSize = R.getVectorElementSize(Operands[0]); if (MaxVecRegSize % EltSize != 0) continue; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll @@ -23,28 +23,21 @@ ; CHECK-NEXT: [[T252:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 9 ; CHECK-NEXT: [[T292:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 10 ; CHECK-NEXT: [[T322:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 11 -; CHECK-NEXT: [[T19:%.*]] = load i32, i32* [[T14]], align 4 -; CHECK-NEXT: [[T23:%.*]] = load i32, i32* [[T18]], align 4 -; CHECK-NEXT: [[T27:%.*]] = load i32, i32* [[T22]], align 4 -; CHECK-NEXT: [[T30:%.*]] = load i32, i32* [[T26]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[T142]] to <2 x i64>* -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[T222]] to <2 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[T14]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[T142]] to <2 x i64>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8 -; CHECK-NEXT: [[T20:%.*]] = add nsw i32 [[T19]], 4 -; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], 4 -; CHECK-NEXT: [[T28:%.*]] = add nsw i32 [[T27]], 6 -; CHECK-NEXT: [[T31:%.*]] = add nsw i32 [[T30]], 7 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP2]], -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[T212]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[T292]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP8]], align 8 -; CHECK-NEXT: store i32 [[T20]], i32* [[T21]], align 4 -; CHECK-NEXT: store i32 [[T24]], i32* [[T25]], align 4 -; CHECK-NEXT: store i32 [[T28]], i32* [[T29]], align 4 -; CHECK-NEXT: store i32 [[T31]], i32* [[T32]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[T222]] to <2 x i64>* +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[T212]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP10]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[T292]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP11]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[T21]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP12]], align 4 ; CHECK-NEXT: ret void ; %t0 = bitcast i8* %v0 to i32*