diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4703,7 +4703,8 @@ auto isVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool { if (isa(I) && I->getOperand(0) == Ptr) return false; - return getLoadStorePointerOperand(I) == Ptr && isUniformDecision(I, VF); + return getLoadStorePointerOperand(I) == Ptr && + (isUniformDecision(I, VF) || Legal->isUniform(Ptr)); }; // Holds a list of values which are known to have at least one uniform use. @@ -4749,10 +4750,8 @@ if (isUniformMemOpUse(&I)) addToWorklistIfAllowed(&I); - if (isVectorizedMemAccessUse(&I, Ptr)) { - assert(isUniformDecision(&I, VF) && "consistency check"); + if (isVectorizedMemAccessUse(&I, Ptr)) HasUniformUse.insert(Ptr); - } } // Add to the worklist any operands which have *only* uniform (e.g. lane 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -3,9 +3,6 @@ target triple = "aarch64-unknown-linux-gnu" -; REQUIRES: asserts -; XFAIL: * - ; Test cases for PR60831. define void @test_invar_gep(ptr %dst) #0 { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -166,19 +166,16 @@ ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[STORE_DEST:%.*]], i64 0 -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, ptr [[TMP12]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 4 ; CHECK-NEXT: [[TMP15:%.*]] = sub i32 [[TMP14]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP7]], i32 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[DOTSPLAT2]], i32 0 -; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP17]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll --- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -50,13 +50,11 @@ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP6]], align 2, !alias.scope !0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[L_2]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: store i16 [[TMP8]], ptr [[TMP7]], align 2, !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 1 -; CHECK-NEXT: store i16 [[TMP9]], ptr [[TMP7]], align 2, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]