diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -831,8 +831,15 @@ Builder.Insert(GEP); StoreInst *NSI = Builder.CreateStore(NewElement, GEP); NSI->copyMetadata(*SI); - if (SI->getAlign() < NSI->getAlign()) - NSI->setAlignment(SI->getAlign()); + Align NewAlignment = std::max(SI->getAlign(), Load->getAlign()); + if (auto *C = dyn_cast(Idx)) + NewAlignment = commonAlignment( + NewAlignment, + C->getZExtValue() * DL.getTypeStoreSize(NewElement->getType())); + else + NewAlignment = commonAlignment( + NewAlignment, DL.getTypeStoreSize(NewElement->getType())); + NSI->setAlignment(NewAlignment); replaceValue(I, *NSI); // Need erasing the store manually. I.eraseFromParent(); diff --git a/llvm/test/Transforms/VectorCombine/load-insert-store.ll b/llvm/test/Transforms/VectorCombine/load-insert-store.ll --- a/llvm/test/Transforms/VectorCombine/load-insert-store.ll +++ b/llvm/test/Transforms/VectorCombine/load-insert-store.ll @@ -2,6 +2,8 @@ ; RUN: opt -S -vector-combine -data-layout=e < %s | FileCheck %s ; RUN: opt -S -vector-combine -data-layout=E < %s | FileCheck %s +; These vector load/store without align will have implicit alignment of vector +; size, which may affect align of scalarized store. define void @insert_store(<16 x i8>* %q, i8 zeroext %s) { ; CHECK-LABEL: @insert_store( ; CHECK-NEXT: entry: @@ -20,16 +22,30 @@ ; CHECK-LABEL: @insert_store_i16_align1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[Q:%.*]], i32 0, i32 3 -; CHECK-NEXT: store i16 [[S:%.*]], i16* [[TMP0]], align 1 +; CHECK-NEXT: store i16 [[S:%.*]], i16* [[TMP0]], align 2 ; CHECK-NEXT: ret void ; entry: - %0 = load <8 x i16>, <8 x i16>* %q + %0 = load <8 x i16>, <8 x i16>* %q, align 16 %vecins = insertelement <8 x i16> %0, i16 %s, i32 3 store <8 x i16> %vecins, <8 x i16>* %q, align 1 ret void } +define void @insert_store_zero_offset(<8 x i16>* %q, i16 zeroext %s) { +; CHECK-LABEL: @insert_store_zero_offset( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[Q:%.*]], i32 0, i32 0 +; CHECK-NEXT: store i16 [[S:%.*]], i16* [[TMP0]], align 128 +; CHECK-NEXT: ret void +; +entry: + %0 = load <8 x i16>, <8 x i16>* %q, align 128 + %vecins = insertelement <8 x i16> %0, i16 %s, i32 0 + store <8 x i16> %vecins, <8 x i16>* %q, align 128 + ret void +} + ; To verify case when index is out of bounds define void @insert_store_outofbounds(<8 x i16>* %q, i16 zeroext %s) { ; CHECK-LABEL: @insert_store_outofbounds( @@ -125,6 +141,24 @@ ret void } +define void @insert_store_nonconst_large_alignment(<4 x i32>* %q, i32 zeroext %s, i32 %idx) { +; CHECK-LABEL: @insert_store_nonconst_large_alignment( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[Q:%.*]], i32 0, i32 [[IDX]] +; CHECK-NEXT: store i32 [[S:%.*]], i32* [[TMP0]], align 4 +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp ult i32 %idx, 4 + call void @llvm.assume(i1 %cmp) + %i = load <4 x i32>, <4 x i32>* %q, align 128 + %vecins = insertelement <4 x i32> %i, i32 %s, i32 %idx + store <4 x i32> %vecins, <4 x i32>* %q, align 128 + ret void +} + define void @insert_store_nonconst_index_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) { ; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_assume( ; CHECK-NEXT: entry: