diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3094,6 +3094,16 @@ case Instruction::Store: { // Check if the stores are consecutive or if we need to swizzle them. llvm::Type *ScalarTy = cast(VL0)->getValueOperand()->getType(); + // Avoid types that are padded when being allocated as scalars, while + // being packed together in a vector (such as i1). + if (DL->getTypeSizeInBits(ScalarTy) != + DL->getTypeAllocSizeInBits(ScalarTy)) { + BS.cancelScheduling(VL, VL0); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); + LLVM_DEBUG(dbgs() << "SLP: Gathering stores of non-packed type.\n"); + return; + } // Make sure all stores in the bundle are simple - we can't vectorize // atomic or volatile stores. SmallVector PointerOps(VL.size()); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll @@ -15,8 +15,8 @@ ; CHECK-NEXT: [[A_AND:%.*]] = and i64 [[A_CAST]], 42 ; CHECK-NEXT: [[B_AND:%.*]] = and i64 [[B_CAST]], 42 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[PTR:%.*]], i32 1 -; CHECK-NEXT: store i64 [[A_AND]], i64* [[PTR]] -; CHECK-NEXT: store i64 [[B_AND]], i64* [[GEP]] +; CHECK-NEXT: store i64 [[A_AND]], i64* [[PTR]], align 8 +; CHECK-NEXT: store i64 [[B_AND]], i64* [[GEP]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -110,3 +110,30 @@ } declare void @f(i64, i64) + +define void @test4(i32 %a, i28* %ptr) { +; Check that we do not vectorize types that are padded to a bigger ones. +; +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[A:%.*]] to i28 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i28, i28* [[PTR:%.*]], i32 1 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i28, i28* [[PTR]], i32 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i28, i28* [[PTR]], i32 3 +; CHECK-NEXT: store i28 [[TRUNC]], i28* [[PTR]], align 4 +; CHECK-NEXT: store i28 [[TRUNC]], i28* [[GEP1]], align 4 +; CHECK-NEXT: store i28 [[TRUNC]], i28* [[GEP2]], align 4 +; CHECK-NEXT: store i28 [[TRUNC]], i28* [[GEP3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %trunc = trunc i32 %a to i28 + %gep1 = getelementptr i28, i28* %ptr, i32 1 + %gep2 = getelementptr i28, i28* %ptr, i32 2 + %gep3 = getelementptr i28, i28* %ptr, i32 3 + store i28 %trunc, i28* %ptr + store i28 %trunc, i28* %gep1 + store i28 %trunc, i28* %gep2 + store i28 %trunc, i28* %gep3 + ret void +}