diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4701,10 +4701,12 @@ }; SmallVector SortedIndices; BasicBlock *BB = nullptr; + bool IsScatterVectorizeUserTE = + UserTreeIdx.UserTE && + UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize; bool AreAllSameInsts = (S.getOpcode() && allSameBlock(VL)) || - (S.OpValue->getType()->isPointerTy() && UserTreeIdx.UserTE && - UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize && + (S.OpValue->getType()->isPointerTy() && IsScatterVectorizeUserTE && VL.size() > 2 && all_of(VL, [&BB](Value *V) { @@ -4765,10 +4767,9 @@ // Check that none of the instructions in the bundle are already in the tree. for (Value *V : VL) { - auto *I = dyn_cast(V); - if (!I) + if (!IsScatterVectorizeUserTE && !isa(V)) continue; - if (getTreeEntry(I)) { + if (getTreeEntry(V)) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V << ") is already in tree.\n"); if (TryToFindDuplicates(S)) @@ -5218,9 +5219,6 @@ } } - bool IsScatterUser = - UserTreeIdx.UserTE && - UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize; // We don't combine GEPs with non-constant indexes. Type *Ty1 = VL0->getOperand(1)->getType(); for (Value *V : VL) { @@ -5228,9 +5226,9 @@ if (!I) continue; auto *Op = I->getOperand(1); - if ((!IsScatterUser && !isa(Op)) || + if ((!IsScatterVectorizeUserTE && !isa(Op)) || (Op->getType() != Ty1 && - ((IsScatterUser && !isa(Op)) || + ((IsScatterVectorizeUserTE && !isa(Op)) || Op->getType()->getScalarSizeInBits() > DL->getIndexSizeInBits( V->getType()->getPointerAddressSpace())))) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-12 | FileCheck %s + +define void @test(i1 %c, ptr %arg) { +; CHECK-LABEL: @test( +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG:%.*]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <4 x ptr> [[SHUFFLE]], <4 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> , <4 x i64> undef) +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: else: +; CHECK-NEXT: [[ARG_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[ARG]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[ARG]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[TMP6]], <2 x ptr> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x ptr> [[TMP9]], ptr [[ARG_1]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP10]], i32 8, <4 x i1> , <4 x i64> undef) +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP11]], [[ELSE]] ] +; CHECK-NEXT: ret void +; + br i1 %c, label %if, label %else + +if: + %i2.0 = load i64, ptr %arg, align 8 + %arg2.1 = getelementptr inbounds i8, ptr %arg, i64 8 + %i2.1 = load i64, ptr %arg2.1, align 8 + %arg2.2 = getelementptr inbounds i8, ptr %arg, i64 24 + %i2.2 = load i64, ptr %arg2.2, align 8 + %arg2.3 = getelementptr inbounds i8, ptr %arg, i64 32 + %i2.3 = load i64, ptr %arg2.3, align 8 + br label %join + +else: + %i.0 = load i64, ptr %arg, align 8 + %arg.1 = getelementptr inbounds i8, ptr %arg, i64 8 + %i.1 = load i64, ptr %arg.1, align 8 + %arg.2 = getelementptr inbounds i8, ptr %arg, i64 24 + %i.2 = load i64, ptr %arg.2, align 8 + %arg.3 = getelementptr inbounds i8, ptr %arg, i64 32 + %i.3 = load i64, ptr %arg.3, align 8 + br label %join + +join: + %phi.3 = phi i64 [ %i2.3, %if ], [ %i.3, %else ] + %phi.2 = phi i64 [ %i2.2, %if ], [ %i.2, %else ] + %phi.1 = phi i64 [ %i2.1, %if ], [ %i.1, %else ] + %phi.0 = phi i64 [ %i2.0, %if ], [ %i.0, %else ] + ret void +} +