Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4946,8 +4946,14 @@ // sign extend the extracted values below. auto *ScalarRoot = VectorizableTree[0]->Scalars[0]; if (MinBWs.count(ScalarRoot)) { - if (auto *I = dyn_cast(VectorRoot)) - Builder.SetInsertPoint(&*++BasicBlock::iterator(I)); + if (auto *I = dyn_cast(VectorRoot)) { + // If current instr is a phi and not the last phi, insert it after the + // last phi node. + if (dyn_cast(I)) + Builder.SetInsertPoint(&*I->getParent()->getFirstInsertionPt()); + else + Builder.SetInsertPoint(&*++BasicBlock::iterator(I)); + } auto BundleWidth = VectorizableTree[0]->Scalars.size(); auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first); auto *VecTy = VectorType::get(MinTy, BundleWidth); Index: llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll @@ -0,0 +1,69 @@ +; RUN: opt < %s -slp-vectorizer -disable-verify -S | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +@d = internal unnamed_addr global i32 5, align 4 + +define dso_local void @l() local_unnamed_addr { + +; This test case is to check the trunc instruction +; won't be inserted in between 2 phi statements. + +; CHECK: bb25: +; CHECK: trunc +; CHECK-NOT: phi +; CHECK: bb34: + +bb: + br label %bb1 + +bb1: ; preds = %bb25, %bb + %tmp = phi i16 [ undef, %bb ], [ %tmp29, %bb25 ] + %tmp2 = phi i16 [ undef, %bb ], [ %tmp30, %bb25 ] + br i1 undef, label %bb3, label %bb11 + +bb3: ; preds = %bb1 + %tmp4 = zext i1 undef to i32 + %tmp5 = xor i16 %tmp2, undef + %tmp6 = icmp ugt i16 %tmp5, 8 + %tmp7 = zext i1 %tmp6 to i32 + %tmp8 = xor i16 %tmp, undef + %tmp9 = icmp ugt i16 %tmp8, 8 + %tmp10 = zext i1 %tmp9 to i32 + br label %bb25 + +bb11: ; preds = %bb1 + %tmp12 = zext i1 undef to i32 + %tmp13 = xor i16 %tmp2, undef + %tmp14 = sext i16 %tmp13 to i64 + %tmp15 = icmp ule i64 undef, %tmp14 + %tmp16 = zext i1 %tmp15 to i32 + %tmp17 = icmp ult i32 undef, %tmp16 + %tmp18 = zext i1 %tmp17 to i32 + %tmp19 = xor i16 %tmp, undef + %tmp20 = sext i16 %tmp19 to i64 + %tmp21 = icmp ule i64 undef, %tmp20 + %tmp22 = zext i1 %tmp21 to i32 + %tmp23 = icmp ult i32 undef, %tmp22 + %tmp24 = zext i1 %tmp23 to i32 + br label %bb25 + +bb25: ; preds = %bb11, %bb3 + %tmp26 = phi i32 [ %tmp24, %bb11 ], [ %tmp10, %bb3 ] + %tmp27 = phi i32 [ %tmp18, %bb11 ], [ %tmp7, %bb3 ] + %tmp28 = phi i32 [ %tmp12, %bb11 ], [ %tmp4, %bb3 ] + %tmp29 = phi i16 [ %tmp19, %bb11 ], [ %tmp8, %bb3 ] + %tmp30 = phi i16 [ %tmp13, %bb11 ], [ %tmp5, %bb3 ] + %tmp31 = and i32 undef, %tmp26 + %tmp32 = and i32 %tmp31, %tmp27 + %tmp33 = and i32 %tmp32, %tmp28 + br i1 undef, label %bb34, label %bb1 + +bb34: ; preds = %bb25 + %tmp35 = phi i32 [ %tmp33, %bb25 ] + br label %bb36 + +bb36: ; preds = %bb34 + store i32 %tmp35, i32* @d, align 4 + ret void +}