diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9088,8 +9088,8 @@ // Set the insert point to the beginning of the basic block if the entry // should not be scheduled. - if (E->State != TreeEntry::NeedToGather && - (doesNotNeedToSchedule(E->Scalars) || + if (doesNotNeedToSchedule(E->Scalars) || + (E->State != TreeEntry::NeedToGather && all_of(E->Scalars, isVectorLikeInstWithConstOps))) { Instruction *InsertInst; if ((E->getOpcode() == Instruction::GetElementPtr && diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no-scheduled-instructions.ll b/llvm/test/Transforms/SLPVectorizer/X86/no-scheduled-instructions.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/no-scheduled-instructions.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=slp-vectorizer -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s + +define void @test() { +; CHECK-LABEL: define void @test +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> ) +; CHECK-NEXT: call void @f(i32 noundef [[TMP1]]) +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: ret void +; + %i27 = extractelement <4 x i32> poison, i64 0 + %i28 = extractelement <4 x i32> poison, i64 1 + %i29 = extractelement <4 x i32> poison, i64 2 + %i30 = extractelement <4 x i32> poison, i64 3 + %i31 = extractelement <4 x i32> zeroinitializer, i64 0 + %i32 = extractelement <4 x i32> zeroinitializer, i64 1 + %i33 = extractelement <4 x i32> zeroinitializer, i64 2 + %i34 = extractelement <4 x i32> zeroinitializer, i64 3 + br i1 undef, label %bb1, label %bb2 + +bb1: + %i11 = mul nsw i32 %i28, %i27 + %i12 = mul nsw i32 %i11, %i29 + %i13 = mul nsw i32 %i12, %i30 + %i14 = mul nsw i32 %i13, %i31 + %i15 = mul nsw i32 %i14, %i32 + %i16 = mul nsw i32 %i15, %i33 + %i17 = mul nsw i32 %i16, %i34 + call void @f(i32 noundef %i17) + br label %bb2 + +bb2: + ret void +} + +declare void @f(i32)