diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -138,8 +138,7 @@ // All recipe users of the sink candidate must be in the same block SinkTo // or all users outside of SinkTo must be uniform-after-vectorization ( // i.e., only first lane is used) . In the latter case, we need to duplicate - // SinkCandidate. At the moment, we identify such UAV's by looking for the - // address operands of widened memory recipes. + // SinkCandidate. auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, SinkCandidate](VPUser *U) { auto *UI = dyn_cast(U); @@ -147,12 +146,8 @@ return false; if (UI->getParent() == SinkTo) return true; - auto *WidenI = dyn_cast(UI); - if (WidenI && WidenI->getAddr() == SinkCandidate) { - NeedsDuplicating = true; - return true; - } - return false; + NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate); + return NeedsDuplicating; }; if (!all_of(SinkCandidate->users(), CanSinkWithUser)) continue; diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll @@ -38,8 +38,9 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP2_1:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 -; CHECK-NEXT: store i64 [[TMP6]], i64* [[TMP2]], align 8 +; CHECK-NEXT: store i64 [[TMP6]], i64* [[TMP2_1]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1 @@ -136,8 +137,9 @@ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i64 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 -; CHECK-NEXT: store i64 [[TMP9]], i64* [[TMP3]], align 8 +; CHECK-NEXT: store i64 [[TMP9]], i64* [[PTR0]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP7]], i64 1 @@ -246,8 +248,9 @@ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i64 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 -; CHECK-NEXT: store i64 [[TMP9]], i64* [[TMP5]], align 8 +; CHECK-NEXT: store i64 [[TMP9]], i64* [[PTR1]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP7]], i64 1