diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4766,9 +4766,10 @@ // Check that none of the instructions in the bundle are already in the tree. for (Value *V : VL) { auto *I = dyn_cast(V); - if (!I) + if (!I && (!UserTreeIdx.UserTE || + UserTreeIdx.UserTE->State != TreeEntry::ScatterVectorize)) continue; - if (getTreeEntry(I)) { + if (getTreeEntry(V)) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V << ") is already in tree.\n"); if (TryToFindDuplicates(S)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-12 | FileCheck %s + +define void @test(i1 %c, ptr %arg) { +; CHECK-LABEL: @test( +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG:%.*]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <4 x ptr> [[SHUFFLE]], <4 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> , <4 x i64> undef) +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: else: +; CHECK-NEXT: [[ARG_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[ARG]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[ARG]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[TMP6]], <2 x ptr> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x ptr> [[TMP9]], ptr [[ARG_1]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP10]], i32 8, <4 x i1> , <4 x i64> undef) +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP11]], [[ELSE]] ] +; CHECK-NEXT: ret void +; + br i1 %c, label %if, label %else + +if: + %i2.0 = load i64, ptr %arg, align 8 + %arg2.1 = getelementptr inbounds i8, ptr %arg, i64 8 + %i2.1 = load i64, ptr %arg2.1, align 8 + %arg2.2 = getelementptr inbounds i8, ptr %arg, i64 24 + %i2.2 = load i64, ptr %arg2.2, align 8 + %arg2.3 = getelementptr inbounds i8, ptr %arg, i64 32 + %i2.3 = load i64, ptr %arg2.3, align 8 + br label %join + +else: + %i.0 = load i64, ptr %arg, align 8 + %arg.1 = getelementptr inbounds i8, ptr %arg, i64 8 + %i.1 = load i64, ptr %arg.1, align 8 + %arg.2 = getelementptr inbounds i8, ptr %arg, i64 24 + %i.2 = load i64, ptr %arg.2, align 8 + %arg.3 = getelementptr inbounds i8, ptr %arg, i64 32 + %i.3 = load i64, ptr %arg.3, align 8 + br label %join + +join: + %phi.3 = phi i64 [ %i2.3, %if ], [ %i.3, %else ] + %phi.2 = phi i64 [ %i2.2, %if ], [ %i.2, %else ] + %phi.1 = phi i64 [ %i2.1, %if ], [ %i.1, %else ] + %phi.0 = phi i64 [ %i2.0, %if ], [ %i.0, %else ] + ret void +} +