diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2660,12 +2660,14 @@
       if (TE->Scalars.size() != VF) {
         if (TE->ReuseShuffleIndices.size() == VF) {
           // Need to reorder the reuses masks of the operands with smaller VF to
-          // be able to find the math between the graph nodes and scalar
+          // be able to find the match between the graph nodes and scalar
           // operands of the given node during vectorization/cost estimation.
           // Build a list of such operands for future reordering.
           assert(all_of(TE->UserTreeIndices,
-                        [VF](const EdgeInfo &EI) {
-                          return EI.UserTE->Scalars.size() == VF;
+                        [VF, &TE](const EdgeInfo &EI) {
+                          return EI.UserTE->Scalars.size() == VF ||
+                                 EI.UserTE->Scalars.size() ==
+                                     TE->Scalars.size();
                         }) &&
                  "All users must be of VF size.");
           SmallOperandsToReorder.insert(TE.get());
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; ModuleID = 'repro1.ll'
+; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=aarch64-w32-windows-gnu | FileCheck %s
+
+define i32 @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND15_PREHEADER:%.*]]
+; CHECK:       for.cond15.preheader:
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       for.cond15:
+; CHECK-NEXT:    br label [[IF_END_1:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[FOR_COND15:%.*]]
+; CHECK:       for.end39:
+; CHECK-NEXT:    switch i32 undef, label [[DO_BODY:%.*]] [
+; CHECK-NEXT:    i32 0, label [[SW_BB:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB195:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb:
+; CHECK-NEXT:    [[ARRAYIDX43:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
+; CHECK-NEXT:    [[ARRAYIDX45:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0
+; CHECK-NEXT:    [[ARRAYIDX51:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1
+; CHECK-NEXT:    [[ARRAYIDX58:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX43]] to <4 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], <double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> zeroinitializer, <4 x double> [[TMP2]])
+; CHECK-NEXT:    br label [[SW_EPILOG:%.*]]
+; CHECK:       sw.bb195:
+; CHECK-NEXT:    br label [[SW_EPILOG]]
+; CHECK:       do.body:
+; CHECK-NEXT:    unreachable
+; CHECK:       sw.epilog:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x double> [ poison, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ]
+; CHECK-NEXT:    ret i32 undef
+; CHECK:       if.end.1:
+; CHECK-NEXT:    br label [[FOR_COND15_1:%.*]]
+; CHECK:       for.cond15.1:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END39:%.*]], label [[FOR_COND15_PREHEADER]]
+;
+entry:
+  %conv = sitofp i32 undef to double
+  %conv2 = sitofp i32 undef to double
+  br label %for.cond15.preheader
+
+for.cond15.preheader:                             ; preds = %for.cond15.1, %entry
+  br label %if.end
+
+for.cond15:                                       ; preds = %if.end
+  br label %if.end.1
+
+if.end:                                           ; preds = %for.cond15.preheader
+  br label %for.cond15
+
+for.end39:                                        ; preds = %for.cond15.1
+  switch i32 undef, label %do.body [
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb195
+  ]
+
+sw.bb:                                            ; preds = %for.end39
+  %arrayidx43 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
+  %0 = load double, double* %arrayidx43, align 8
+  %arrayidx45 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0
+  %1 = load double, double* %arrayidx45, align 8
+  %arrayidx51 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1
+  %2 = load double, double* %arrayidx51, align 8
+  %arrayidx58 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1
+  %3 = load double, double* %arrayidx58, align 8
+  %mul = fmul double undef, %conv2
+  %mul109 = fmul double undef, %conv
+  %mul143 = fmul double %0, %mul
+  %4 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul143)
+  %mul154 = fmul double %1, %mul109
+  %5 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul154)
+  %mul172 = fmul double %3, %mul
+  %6 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul172)
+  %mul183 = fmul double %2, %mul109
+  %7 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul183)
+  br label %sw.epilog
+
+sw.bb195:                                         ; preds = %for.end39
+  br label %sw.epilog
+
+do.body:                                          ; preds = %for.end39
+  unreachable
+
+sw.epilog:                                        ; preds = %sw.bb195, %sw.bb
+  %x4.0 = phi double [ undef, %sw.bb195 ], [ %7, %sw.bb ]
+  %x3.0 = phi double [ undef, %sw.bb195 ], [ %6, %sw.bb ]
+  %x1.0 = phi double [ undef, %sw.bb195 ], [ %5, %sw.bb ]
+  %x0.0 = phi double [ undef, %sw.bb195 ], [ %4, %sw.bb ]
+  ret i32 undef
+
+if.end.1:                                         ; preds = %for.cond15
+  br label %for.cond15.1
+
+for.cond15.1:                                     ; preds = %if.end.1
+  br i1 undef, label %for.end39, label %for.cond15.preheader
+}
+
+declare double @llvm.fmuladd.f64(double, double, double)