diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7159,6 +7159,7 @@ finalize(ArrayRef ExtMask, unsigned VF = 0, function_ref &)> Action = {}) { IsFinalized = true; + bool ShouldExtendManyInputs = (InVectors.size() > 1); if (Action) { const PointerUnion &Vec = InVectors.front(); if (InVectors.size() == 2) { @@ -7179,7 +7180,7 @@ CommonMask.size())); Action(V, CommonMask); } - ::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/true); + ::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/ShouldExtendManyInputs); if (CommonMask.empty()) return Cost; int Limit = CommonMask.size() * 2; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver4 -S < %s | FileCheck %s + +define internal i32 @testfunc() { +; CHECK-LABEL: define internal i32 @testfunc +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: br label [[TMP1:%.*]] +; CHECK: 1: +; CHECK-NEXT: [[TMP2:%.*]] = phi float [ 0.000000e+00, [[TMP0:%.*]] ], [ 0.000000e+00, [[TMP8:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi float [ 0.000000e+00, [[TMP0]] ], [ 0.000000e+00, [[TMP8]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi float [ 0.000000e+00, [[TMP0]] ], [ 0.000000e+00, [[TMP8]] ] +; CHECK-NEXT: br i1 false, label [[TMP8]], label [[TMP5:%.*]] +; CHECK: 5: +; CHECK-NEXT: br i1 false, label [[TMP6:%.*]], label [[TMP8]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> zeroinitializer, i64 0 +; CHECK-NEXT: br label [[TMP8]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi float [ [[TMP2]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ [[TMP2]], [[TMP5]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi float [ [[TMP3]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi float [ [[TMP4]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi float [ [[TMP4]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ [[TMP3]], [[TMP5]] ] +; CHECK-NEXT: br label [[TMP1]] +; + br label %1 + +1: ; preds = %8, %0 + %2 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ] + %3 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ] + %4 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ] + br i1 false, label %8, label %5 + +5: ; preds = %1 + br i1 false, label %6, label %8 + +6: ; preds = %5 + %7 = extractelement <8 x float> zeroinitializer, i64 0 + br label %8 + +8: ; preds = %6, %5, %1 + %9 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %10 = phi float [ %2, %6 ], [ 0.000000e+00, %1 ], [ %2, %5 ] + %11 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %12 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %13 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %14 = phi float [ %3, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %15 = phi float [ %4, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %16 = phi float [ %4, %6 ], [ 0.000000e+00, %1 ], [ %3, %5 ] + br label %1 +}