diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -8363,7 +8363,8 @@ } } } else { - Builder.SetInsertPoint(cast(User)); + Builder.SetInsertPoint(VecI->getParent(), + std::next(VecI->getIterator())); Value *NewInst = ExtractAndExtendIfNeeded(Vec); CSEBlocks.insert(cast(User)->getParent()); User->replaceUsesOfWith(Scalar, NewInst); diff --git a/llvm/test/Transforms/SLPVectorizer/pr55796.ll b/llvm/test/Transforms/SLPVectorizer/pr55796.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/pr55796.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -S < %s | FileCheck %s + +; REQUIRES: asserts +define i32 @pluto() #0 { +; CHECK-LABEL: @pluto( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* undef, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[TMP]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[SHUFFLE10:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE10]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[SHUFFLE10]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i32 1 +; CHECK-NEXT: [[SHUFFLE14:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: br i1 undef, label [[BB41:%.*]], label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[SHUFFLE14]]) +; CHECK-NEXT: [[OP_RDX15:%.*]] = add i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br label [[BB20:%.*]] +; CHECK: bb19: +; CHECK-NEXT: br label [[BB20]] +; CHECK: bb20: +; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ [[OP_RDX15]], [[BB3]] ], [ undef, [[BB19:%.*]] ] +; CHECK-NEXT: ret i32 undef +; CHECK: bb22: +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> ) +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE10]] +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) +; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[TMP9]], undef +; CHECK-NEXT: [[OP_RDX12:%.*]] = add i32 [[OP_RDX11]], [[TMP3]] +; CHECK-NEXT: [[OP_RDX13:%.*]] = add i32 [[TMP12]], [[OP_RDX12]] +; CHECK-NEXT: [[TMP40:%.*]] = call i32 @bar(i32 [[TMP]], i32 [[OP_RDX13]]) +; CHECK-NEXT: ret i32 [[TMP40]] +; CHECK: bb41: +; CHECK-NEXT: br label [[BB22:%.*]] +; +bb: + %tmp = load i32, i32* undef, align 4 + %tmp1 = sub nsw i32 110, %tmp + %tmp2 = sub nsw i32 0, %tmp + br i1 undef, label %bb41, label %bb3 + +bb3: ; preds = %bb + %tmp4 = add i32 0, %tmp1 + %tmp5 = add i32 %tmp4, %tmp2 + %tmp6 = add i32 %tmp5, 90 + %tmp7 = add i32 %tmp6, %tmp2 + %tmp8 = add i32 %tmp7, 72 + %tmp9 = add i32 %tmp8, %tmp2 + %tmp10 = add i32 %tmp9, 56 + %tmp11 = add i32 %tmp10, %tmp2 + %tmp12 = add i32 %tmp11, 42 + %tmp13 = add i32 %tmp12, %tmp2 + %tmp14 = add i32 %tmp13, 30 + %tmp15 = add i32 %tmp14, %tmp2 + %tmp16 = add i32 %tmp15, 20 + %tmp17 = add i32 %tmp16, %tmp2 + %tmp18 = add i32 %tmp17, 12 + br label %bb20 + +bb19: ; No predecessors! + br label %bb20 + +bb20: ; preds = %bb19, %bb3 + %tmp21 = phi i32 [ %tmp18, %bb3 ], [ undef, %bb19 ] + ret i32 undef + +bb22: ; preds = %bb41 + %tmp23 = add i32 undef, %tmp1 + %tmp24 = add i32 %tmp23, %tmp2 + %tmp25 = add i32 %tmp24, 90 + %tmp26 = add i32 %tmp25, %tmp2 + %tmp27 = add i32 %tmp26, 72 + %tmp28 = add i32 %tmp27, %tmp2 + %tmp29 = add i32 %tmp28, 56 + %tmp30 = add i32 %tmp29, %tmp2 + %tmp31 = add i32 %tmp30, 42 + %tmp32 = add i32 %tmp31, %tmp2 + %tmp33 = add i32 %tmp32, 30 + %tmp34 = add i32 %tmp33, %tmp2 + %tmp35 = add i32 %tmp34, 20 + %tmp36 = add i32 %tmp35, %tmp2 + %tmp37 = add i32 %tmp36, 12 + %tmp38 = add i32 %tmp37, %tmp2 + %tmp39 = add i32 %tmp38, 6 + %tmp40 = call i32 @bar(i32 %tmp, i32 %tmp39) + ret i32 %tmp40 + +bb41: ; preds = %bb + br label %bb22 +} + +declare i32 @bar(i32, i32) + +attributes #0 = { "target-features"="-avx512pf,+avx512f,+avx512bw" }