Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4909,8 +4909,14 @@ // sign extend the extracted values below. auto *ScalarRoot = VectorizableTree[0]->Scalars[0]; if (MinBWs.count(ScalarRoot)) { - if (auto *I = dyn_cast(VectorRoot)) - Builder.SetInsertPoint(&*++BasicBlock::iterator(I)); + if (auto *I = dyn_cast(VectorRoot)) { + // If current instr is a phi and not the last phi, insert it after the + // last phi node. + if (dyn_cast(I)) + Builder.SetInsertPoint(&*I->getParent()->getFirstInsertionPt()); + else + Builder.SetInsertPoint(&*++BasicBlock::iterator(I)); + } auto BundleWidth = VectorizableTree[0]->Scalars.size(); auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first); auto *VecTy = FixedVectorType::get(MinTy, BundleWidth); Index: llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll +++ llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP12:%.*]], [[BB25:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB25:%.*]] ] ; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB11:%.*]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = zext i1 undef to i32 @@ -32,12 +32,12 @@ ; CHECK: bb25: ; CHECK-NEXT: [[TMP28:%.*]] = phi i32 [ [[TMP12]], [[BB11]] ], [ [[TMP4]], [[BB3]] ] ; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP9]], [[BB11]] ], [ [[TMP3]], [[BB3]] ] -; CHECK-NEXT: [[TMP11:%.*]] = trunc <2 x i32> [[TMP10]] to <2 x i8> -; CHECK-NEXT: [[TMP12]] = phi <2 x i16> [ [[TMP4]], [[BB11]] ], [ [[TMP1]], [[BB3]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i8> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP11]] = phi <2 x i16> [ [[TMP4]], [[BB11]] ], [ [[TMP1]], [[BB3]] ] +; CHECK-NEXT: [[TMP12:%.*]] = trunc <2 x i32> [[TMP10]] to <2 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i8> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-NEXT: [[TMP31:%.*]] = and i32 undef, [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i8> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i8> [[TMP12]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = zext i8 [[TMP15]] to i32 ; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], [[TMP16]] ; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], [[TMP28]]