diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2660,12 +2660,14 @@ if (TE->Scalars.size() != VF) { if (TE->ReuseShuffleIndices.size() == VF) { // Need to reorder the reuses masks of the operands with smaller VF to - // be able to find the math between the graph nodes and scalar + // be able to find the match between the graph nodes and scalar // operands of the given node during vectorization/cost estimation. // Build a list of such operands for future reordering. assert(all_of(TE->UserTreeIndices, - [VF](const EdgeInfo &EI) { - return EI.UserTE->Scalars.size() == VF; + [VF, &TE](const EdgeInfo &EI) { + return EI.UserTE->Scalars.size() == VF || + EI.UserTE->Scalars.size() == + TE->Scalars.size(); }) && "All users must be of VF size."); SmallOperandsToReorder.insert(TE.get()); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; ModuleID = 'repro1.ll' +; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=aarch64-w32-windows-gnu | FileCheck %s + +define i32 @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND15_PREHEADER:%.*]] +; CHECK: for.cond15.preheader: +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: for.cond15: +; CHECK-NEXT: br label [[IF_END_1:%.*]] +; CHECK: if.end: +; CHECK-NEXT: br label [[FOR_COND15:%.*]] +; CHECK: for.end39: +; CHECK-NEXT: switch i32 undef, label [[DO_BODY:%.*]] [ +; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] +; CHECK-NEXT: i32 1, label [[SW_BB195:%.*]] +; CHECK-NEXT: ] +; CHECK: sw.bb: +; CHECK-NEXT: [[ARRAYIDX43:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0 +; CHECK-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0 +; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1 +; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX43]] to <4 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> zeroinitializer, <4 x double> [[TMP2]]) +; CHECK-NEXT: br label [[SW_EPILOG:%.*]] +; CHECK: sw.bb195: +; CHECK-NEXT: br label [[SW_EPILOG]] +; CHECK: do.body: +; CHECK-NEXT: unreachable +; CHECK: sw.epilog: +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x double> [ poison, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ] +; CHECK-NEXT: ret i32 undef +; CHECK: if.end.1: +; CHECK-NEXT: br label [[FOR_COND15_1:%.*]] +; CHECK: for.cond15.1: +; CHECK-NEXT: br i1 undef, label [[FOR_END39:%.*]], label [[FOR_COND15_PREHEADER]] +; +entry: + %conv = sitofp i32 undef to double + %conv2 = sitofp i32 undef to double + br label %for.cond15.preheader + +for.cond15.preheader: ; preds = %for.cond15.1, %entry + br label %if.end + +for.cond15: ; preds = %if.end + br label %if.end.1 + +if.end: ; preds = %for.cond15.preheader + br label %for.cond15 + +for.end39: ; preds = %for.cond15.1 + switch i32 undef, label %do.body [ + i32 0, label %sw.bb + i32 1, label %sw.bb195 + ] + +sw.bb: ; preds = %for.end39 + %arrayidx43 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0 + %0 = load double, double* %arrayidx43, align 8 + %arrayidx45 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0 + %1 = load double, double* %arrayidx45, align 8 + %arrayidx51 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1 + %2 = load double, double* %arrayidx51, align 8 + %arrayidx58 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1 + %3 = load double, double* %arrayidx58, align 8 + %mul = fmul double undef, %conv2 + %mul109 = fmul double undef, %conv + %mul143 = fmul double %0, %mul + %4 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul143) + %mul154 = fmul double %1, %mul109 + %5 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul154) + %mul172 = fmul double %3, %mul + %6 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul172) + %mul183 = fmul double %2, %mul109 + %7 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul183) + br label %sw.epilog + +sw.bb195: ; preds = %for.end39 + br label %sw.epilog + +do.body: ; preds = %for.end39 + unreachable + +sw.epilog: ; preds = %sw.bb195, %sw.bb + %x4.0 = phi double [ undef, %sw.bb195 ], [ %7, %sw.bb ] + %x3.0 = phi double [ undef, %sw.bb195 ], [ %6, %sw.bb ] + %x1.0 = phi double [ undef, %sw.bb195 ], [ %5, %sw.bb ] + %x0.0 = phi double [ undef, %sw.bb195 ], [ %4, %sw.bb ] + ret i32 undef + +if.end.1: ; preds = %for.cond15 + br label %for.cond15.1 + +for.cond15.1: ; preds = %if.end.1 + br i1 undef, label %for.end39, label %for.cond15.preheader +} + +declare double @llvm.fmuladd.f64(double, double, double)