Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4909,8 +4909,14 @@ // sign extend the extracted values below. auto *ScalarRoot = VectorizableTree[0]->Scalars[0]; if (MinBWs.count(ScalarRoot)) { - if (auto *I = dyn_cast(VectorRoot)) - Builder.SetInsertPoint(&*++BasicBlock::iterator(I)); + if (auto *I = dyn_cast(VectorRoot)) { + // If current instr is a phi and not the last phi, insert it after the + // last phi node. + if (isa(I)) + Builder.SetInsertPoint(&*I->getParent()->getFirstInsertionPt()); + else + Builder.SetInsertPoint(&*++BasicBlock::iterator(I)); + } auto BundleWidth = VectorizableTree[0]->Scalars.size(); auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first); auto *VecTy = FixedVectorType::get(MinTy, BundleWidth); Index: llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll +++ llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll @@ -1,4 +1,5 @@ -;RUN: opt < %s -disable-verify -slp-vectorizer -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +;RUN: opt < %s -slp-vectorizer -S | FileCheck %s target triple = "aarch64-unknown-linux-gnu" @d = internal unnamed_addr global i32 5, align 4 @@ -7,7 +8,7 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP12:%.*]], [[BB25:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB25:%.*]] ] ; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB11:%.*]] ; CHECK: bb3: ; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32 @@ -27,12 +28,12 @@ ; CHECK: bb25: ; CHECK-NEXT: [[I28:%.*]] = phi i32 [ [[I12]], [[BB11]] ], [ [[I4]], [[BB3]] ] ; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP9]], [[BB11]] ], [ [[TMP3]], [[BB3]] ] -; CHECK-NEXT: [[TMP11:%.*]] = trunc <2 x i32> [[TMP10]] to <2 x i8> -; CHECK-NEXT: [[TMP12]] = phi <2 x i16> [ [[TMP4]], [[BB11]] ], [ [[TMP1]], [[BB3]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i8> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP11]] = phi <2 x i16> [ [[TMP4]], [[BB11]] ], [ [[TMP1]], [[BB3]] ] +; CHECK-NEXT: [[TMP12:%.*]] = trunc <2 x i32> [[TMP10]] to <2 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i8> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-NEXT: [[I31:%.*]] = and i32 undef, [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i8> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i8> [[TMP12]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = zext i8 [[TMP15]] to i32 ; CHECK-NEXT: [[I32:%.*]] = and i32 [[I31]], [[TMP16]] ; CHECK-NEXT: [[I33:%.*]] = and i32 [[I32]], [[I28]]