diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4348,6 +4348,10 @@ for (auto *V : VL) { ++Idx; + // Need to exclude undefs from analysis. + if (isa(V) || Mask[Idx] == UndefMaskElem) + continue; + // Reached the start of a new vector registers. if (Idx % EltsPerVector == 0) { AllConsecutive = true; @@ -4357,9 +4361,11 @@ // Check all extracts for a vector register on the target directly // extract values in order. unsigned CurrentIdx = *getExtractIndex(cast(V)); - unsigned PrevIdx = *getExtractIndex(cast(VL[Idx - 1])); - AllConsecutive &= PrevIdx + 1 == CurrentIdx && - CurrentIdx % EltsPerVector == Idx % EltsPerVector; + if (!isa(VL[Idx - 1]) && Mask[Idx - 1] != UndefMaskElem) { + unsigned PrevIdx = *getExtractIndex(cast(VL[Idx - 1])); + AllConsecutive &= PrevIdx + 1 == CurrentIdx && + CurrentIdx % EltsPerVector == Idx % EltsPerVector; + } if (AllConsecutive) continue; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; CHECK-NEXT: [[ADD8_I_I:%.*]] = fadd fast double [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[CMP42_I:%.*]] = fcmp fast ole double [[ADD8_I_I]], 0.000000e+00 +; CHECK-NEXT: br i1 false, label [[BODY]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: br i1 false, label [[IF_THEN135_I:%.*]], label [[IF_END209_I:%.*]] +; CHECK: if.then135.i: +; CHECK-NEXT: [[TMP6:%.*]] = fcmp fast olt <2 x double> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i1> , i1 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP8]], <2 x double> zeroinitializer, <2 x double> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[TMP11]], zeroinitializer +; CHECK-NEXT: br label [[IF_END209_I]] +; CHECK: if.end209.i: +; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x double> [ [[TMP12]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ] +; CHECK-NEXT: ret void +; +entry: + br label %body + +body: + %phi1 = phi double [ 0.000000e+00, %entry ], [ 0.000000e+00, %body ] + %phi2 = phi double [ 0.000000e+00, %entry ], [ 0.000000e+00, %body ] + %mul.i478.i = fmul fast double %phi1, 0.000000e+00 + %mul7.i485.i = fmul fast double undef, 0.000000e+00 + %add8.i.i = fadd fast double %mul.i478.i, %mul7.i485.i + %cmp42.i = fcmp fast ole double %add8.i.i, 0.000000e+00 + br i1 false, label %body, label %exit + +exit: + br i1 false, label %if.then135.i, label %if.end209.i + +if.then135.i: + %cmp145.i = fcmp fast olt double %phi1, 0.000000e+00 + %0 = select i1 false, double 0.000000e+00, double 0.000000e+00 + %cmp152.i = fcmp fast olt double %phi2, 0.000000e+00 + %1 = select i1 %cmp152.i, double 0.000000e+00, double 0.000000e+00 + %mul166.i = fmul fast double 0.000000e+00, %0 + %mul177.i = fmul fast double %mul166.i, 0.000000e+00 + %add178.i = fadd fast double %mul177.i, 0.000000e+00 + %mul181.i = fmul fast double 0.000000e+00, %1 + %mul182.i = fmul fast double %mul181.i, 0.000000e+00 + %add183.i = fadd fast double %mul182.i, 0.000000e+00 + br label %if.end209.i + +if.end209.i: + %drdys.1.i = phi double [ %add183.i, %if.then135.i ], [ 0.000000e+00, %exit ] + %dbdxs.1.i = phi double [ %add178.i, %if.then135.i ], [ 0.000000e+00, %exit ] + ret void +}