diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -1191,7 +1191,29 @@ // Because accesses (2) and (3) are dependent, we can group (2) with (1) // but not with (4). If we did, the dependent access (3) would be within // the boundaries of the (2, 4) group. - if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) { + // + // + // If GroupB is a load group, We have to compare the AI against all + // members of GroupB because if any load within GroupB has a dependency + // against AI, we need to mark GroupB as complete and also release the + // storeGroup (if A belongs to one). The former prevents incorrect + // hoisting of load B above store A while the latter prevents incorrect + // sinking of store A below load B. + Instruction *DependentInst = nullptr; + if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) + DependentInst = B; + else if (GroupB && isa(B)) { + for (uint32_t Index = 0; Index < GroupB->getFactor(); ++Index) { + Instruction *MemberOfGroupB = GroupB->getMember(Index); + if (MemberOfGroupB && + !canReorderMemAccessesForInterleavedGroups( + &*AI, &*AccessStrideInfo.find(MemberOfGroupB))) { + DependentInst = MemberOfGroupB; + break; + } + } + } + if (!DependentInst) { // If a dependence exists and A is already in a group, we know that A // must be a store since A precedes B and WAR dependences are allowed. // Thus, A would be sunk below B. We release A's group to prevent this @@ -1201,15 +1223,16 @@ InterleaveGroup *StoreGroup = getInterleaveGroup(A); LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to " - "dependence between " << *A << " and "<< *B << '\n'); + "dependence between " + << *A << " and " << *DependentInst << '\n'); StoreGroups.remove(StoreGroup); releaseGroup(StoreGroup); } // If B is a load and part of an interleave group, no earlier loads can - // be added to B's interleave group, because this would mean the load B - // would need to be moved across store A. Mark the interleave group as - // complete. + // be added to B's interleave group, because this would mean the + // DependentInst would need to be moved across store A. Mark the + // interleave group as complete. if (GroupB && isa(B)) { LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B << " as complete.\n"); diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll @@ -47,19 +47,26 @@ ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 ; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <12 x i32>, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i32> [[WIDE_VEC3]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC3]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = add <4 x i32> [[STRIDED_VEC5]], [[STRIDED_VEC4]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP21]], i32 0 -; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP10]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP21]], i32 1 -; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP11]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP21]], i32 2 -; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP21]], i32 3 -; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> poison, i32 [[TMP21]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP22]], i32 1 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP23]], i32 2 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i32> [[TMP27]], i32 [[TMP24]], i32 3 +; CHECK-NEXT: [[TMP29:%.*]] = add <4 x i32> [[TMP28]], [[STRIDED_VEC4]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP29]], i32 0 +; CHECK-NEXT: store i32 [[TMP30]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP29]], i32 1 +; CHECK-NEXT: store i32 [[TMP31]], ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP29]], i32 2 +; CHECK-NEXT: store i32 [[TMP32]], ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[TMP29]], i32 3 +; CHECK-NEXT: store i32 [[TMP33]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -121,7 +128,6 @@ ; compare against the obstructing stores (%l2 versus the store) there is no ; dependency. However, the other load in %l2's interleave group (%l3) does ; obstruct with the store. -; FIXME: The test case is currently mis-compiled. define void @pr63602_2(ptr %arr) { ; CHECK-LABEL: define void @pr63602_2 ; CHECK-SAME: (ptr [[ARR:%.*]]) { @@ -142,38 +148,48 @@ ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX2]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -2 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0 ; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP10]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1 ; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP11]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 2 ; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3 ; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP13]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP18]], i32 0 -; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP10]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP18]], i32 1 -; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP11]], align 4 -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP18]], i32 2 -; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP18]], i32 3 -; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw i64 [[TMP6]], 2 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP24]], i32 [[TMP21]], i32 1 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP22]], i32 2 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP23]], i32 3 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 +; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <12 x i32>, ptr [[TMP28]], align 4 +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i32> [[WIDE_VEC3]], <12 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = add <4 x i32> [[TMP27]], [[STRIDED_VEC4]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP29]], i32 0 +; CHECK-NEXT: store i32 [[TMP30]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP29]], i32 1 +; CHECK-NEXT: store i32 [[TMP31]], ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP29]], i32 2 +; CHECK-NEXT: store i32 [[TMP32]], ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[TMP29]], i32 3 +; CHECK-NEXT: store i32 [[TMP33]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 17, 16 -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 49, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 52, [[MIDDLE_BLOCK]] ], [ 4, [[ENTRY]] ] @@ -195,7 +211,7 @@ ; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP_IV_2]], align 4 ; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i64 [[IV_2]], 3 ; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i64 [[IV_2]], 50 -; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll @@ -3,15 +3,9 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" target triple = "x86_64-apple-macos" -; This is currently miscompiled. -; %l2 load and the preceeding store has a dependency. However, we currently sink +; %l2 load and the preceeding store has a dependency. We should not sink ; that store into the last store (by creating an interleaved store group). This -; means the loaded %l2 has incorrect value. -; We do not release this store group correctly because the next interleave group -; chosen compares only the memory access of last load in program (%l3) against the dependent store location -; (%gep.iv.1.plus.2) and they are different, thereby incorrectly assuming no -; dependency. We need to compare against all loads in that interleaved group -; (%l2 is part of it). +; means the loaded %l2 will have incorrect value. define void @avoid_sinking_store_across_load(ptr %arr) { ; CHECK-LABEL: define void @avoid_sinking_store_across_load ; CHECK-SAME: (ptr [[ARR:%.*]]) #[[ATTR0:[0-9]+]] { @@ -28,26 +22,31 @@ ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 -2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[VEC_IND2]] ; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[STRIDED_VEC5]], +; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[STRIDED_VEC]], ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP8]], <4 x ptr> [[TMP7]], i32 4, <4 x i1> ) -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP9]], <4 x ptr> [[TMP5]], i32 4, <4 x i1> ) +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0 +; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <12 x i32>, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC4]], <12 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_VEC6:%.*]] = load <12 x i32>, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <12 x i32> [[WIDE_VEC6]], <12 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC5]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP13]], <4 x ptr> [[TMP5]], i32 4, <4 x i1> ) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 17, 16 -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 49, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 52, [[MIDDLE_BLOCK]] ], [ 4, [[ENTRY]] ] @@ -70,7 +69,7 @@ ; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP_IV_2]], align 4 ; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i64 [[IV_2]], 3 ; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i64 [[IV_2]], 50 -; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -debug-only=vectorutils -disable-output -enable-interleaved-mem-accesses=true 2>&1 | FileCheck %s ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -105,3 +106,5 @@ exit: ; preds = %latch ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}}