Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
===================================================================
--- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -196,10 +196,6 @@
 
 
   Instruction *getExactFPInst() { return ExactFPMathInst; }
-  bool canVectorizeFPMath(const LoopVectorizeHints &Hints,
-                          bool HintsAllowReordering) const {
-    return !ExactFPMathInst || Hints.allowReordering(HintsAllowReordering);
-  }
 
   unsigned getNumRuntimePointerChecks() const {
     return NumRuntimePointerChecks;
@@ -257,6 +253,12 @@
   /// If false, good old LV code.
   bool canVectorize(bool UseVPlanNativePath);
 
+  /// Returns true if it is legal to vectorize the FP math operations in this
+  /// loop. Vectorizing is legal if we allow reordering of FP operations, or if
+  /// we can use in-order reductions.
+  bool canVectorizeFPMath(bool EnableStrictReductions,
+                          bool HintsAllowReordering);
+
   /// Return true if we can vectorize this loop while folding its tail by
   /// masking, and mark all respective loads/stores for masking.
   /// This object's state is only modified iff this function returns true.
Index: llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -857,6 +857,35 @@
   return true;
 }
 
+bool LoopVectorizationLegality::canVectorizeFPMath(
+    bool EnableStrictReductions, bool HintsAllowReordering) {
+
+  // First check if there is any ExactFP math or if we allow reassociations
+  if (!Requirements->getExactFPInst() ||
+      Hints->allowReordering(HintsAllowReordering))
+    return true;
+
+  if (!EnableStrictReductions)
+    return false;
+
+  // If the above is false, we have ExactFPMath & do not allow reordering.
+  // First check if we have any Exact FP induction vars, which we cannot
+  // vectorize.
+  if (any_of(getInductionVars(), [&](auto &Induction) -> bool {
+        InductionDescriptor IndDesc = Induction.second;
+        return IndDesc.getExactFPMathInst();
+      }))
+    return false;
+
+  // We can now only vectorize if all reductions with Exact FP math also
+  // have the isOrdered flag set, which indicates that we can move the
+  // reduction operations in-loop.
+  return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
+      RecurrenceDescriptor RdxDesc = Reduction.second;
+      return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
+    }));
+}
+
 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
   Value *In0 = const_cast<Value *>(V);
   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9938,7 +9938,7 @@
     return false;
   }
 
-  if (!Requirements.canVectorizeFPMath(Hints, HintsAllowReordering)) {
+  if (!LVL.canVectorizeFPMath(EnableStrictReductions, HintsAllowReordering)) {
     ORE->emit([&]() {
       auto *ExactFPMathInst = Requirements.getExactFPInst();
       return OptimizationRemarkAnalysisFPCommute(DEBUG_TYPE, "CantReorderFPOps",
Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -1,6 +1,6 @@
-; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions -S | FileCheck %s -check-prefix=CHECK
-; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions -hints-allow-reordering=false -S | FileCheck %s -check-prefix=CHECK-NO-REORDER
+; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -hints-allow-reordering=false -S | FileCheck %s
 
+; CHECK-REMARKS: vectorized loop (vectorization width: 8, interleaved count: 1)
 define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_strict
 ; CHECK: vector.body:
@@ -10,9 +10,6 @@
 ; CHECK: for.end
 ; CHECK: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX]], %middle.block ]
 ; CHECK: ret float %[[PHI]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_strict
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   br label %for.body
 
@@ -30,6 +27,7 @@
   ret float %add
 }
 
+; CHECK-REMARKS: vectorized loop (vectorization width: 8, interleaved count: 4)
 define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_strict_unroll
 ; CHECK: vector.body:
@@ -46,9 +44,6 @@
 ; CHECK: for.end
 ; CHECK: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX4]], %middle.block ]
 ; CHECK: ret float %[[PHI]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_strict_unroll
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   br label %for.body
 
@@ -66,6 +61,7 @@
   ret float %add
 }
 
+; CHECK-REMARKS: vectorized loop (vectorization width: 4, interleaved count: 2)
 define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @fadd_strict_interleave
 ; CHECK: entry
@@ -90,9 +86,6 @@
 ; CHECK: %[[RDX2]] = call float @llvm.vector.reduce.fadd.nxv4f32(float %[[VEC_PHI2]], <vscale x 4 x float> %[[MGATHER2]])
 ; CHECK: for.end
 ; CHECK ret void
-
-; CHECK-NO-REORDER-LABEL: @fadd_strict_interleave
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   %arrayidxa = getelementptr inbounds float, float* %a, i64 1
   %a1 = load float, float* %a, align 4
@@ -120,6 +113,7 @@
   ret void
 }
 
+; CHECK-REMARKS: vectorized loop (vectorization width: 4, interleaved count: 1)
 define float @fadd_invariant(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @fadd_invariant
 ; CHECK: vector.body
@@ -133,9 +127,6 @@
 ; CHECK: for.end
 ; CHECK: %[[PHI:.*]] = phi float [ 0.000000e+00, %entry ], [ %[[EXIT_PHI]], %for.end.loopexit ]
 ; CHECK: ret float %[[PHI]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_invariant
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   %arrayidx = getelementptr inbounds float, float* %a, i64 1
   %0 = load float, float* %arrayidx, align 4
@@ -160,6 +151,7 @@
   ret float %res
 }
 
+; CHECK-REMARKS: vectorized loop (vectorization width: 4, interleaved count: 1)
 define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @fadd_conditional
 ; CHECK: vector.body
@@ -182,9 +174,6 @@
 ; CHECK: for.end
 ; CHECK: %[[RDX_PHI:.*]] = phi float [ %[[FADD]], %for.inc ], [ %[[RDX]], %middle.block ]
 ; CHECK: ret float %[[RDX_PHI]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_conditional
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   br label %for.body
 
@@ -214,30 +203,10 @@
 }
 
 ; Negative test - loop contains multiple fadds which we cannot safely reorder
-; Note: This test vectorizes the loop with a non-strict implementation, which reorders the FAdd operations.
-; This is happening because we are using hints, where allowReordering returns true.
+; CHECK-REMARKS: loop not vectorized: cannot prove it is safe to reorder floating-point operations
 define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) {
 ; CHECK-LABEL: @fadd_multiple
-; CHECK: vector.body
-; CHECK: %[[PHI:.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> undef, float -0.000000e+00, i32 0), <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer), float -0.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
-; CHECK: %[[VEC_LOAD1:.*]] = load <vscale x 8 x float>, <vscale x 8 x float>
-; CHECK: %[[VEC_FADD1:.*]] = fadd <vscale x 8 x float> %[[PHI]], %[[VEC_LOAD1]]
-; CHECK: %[[VEC_LOAD2:.*]] = load <vscale x 8 x float>, <vscale x 8 x float>
-; CHECK: %[[VEC_FADD2]] = fadd <vscale x 8 x float> %[[VEC_FADD1]], %[[VEC_LOAD2]]
-; CHECK: middle.block
-; CHECK: %[[RDX:.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> %[[VEC_FADD2]])
-; CHECK: for.body
-; CHECK: %[[SUM:.*]] = phi float [ %bc.merge.rdx, %scalar.ph ], [ %[[FADD2:.*]], %for.body ]
-; CHECK: %[[LOAD1:.*]] = load float, float*
-; CHECK: %[[FADD1:.*]] = fadd float %[[SUM]], %[[LOAD1]]
-; CHECK: %[[LOAD2:.*]] = load float, float*
-; CHECK: %[[FADD2]] = fadd float %[[FADD1]], %[[LOAD2]]
-; CHECK: for.end
-; CHECK: %[[RET:.*]] = phi float [ %[[FADD2]], %for.body ], [ %[[RDX]], %middle.block ]
-; CHECK: ret float %[[RET]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_multiple
-; CHECK-NO-REORDER-NOT: vector.body
+; CHECK-NOT: vector.body
 entry:
   br label %for.body
 
Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -1,6 +1,7 @@
-; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -enable-strict-reductions -S | FileCheck %s -check-prefix=CHECK
-; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -enable-strict-reductions -hints-allow-reordering=false -S | FileCheck %s -check-prefix=CHECK-NO-REORDER
+; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -enable-strict-reductions -hints-allow-reordering=false -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARKS
 
+; CHECK-REMARKS: vectorized loop (vectorization width: 8, interleaved count: 1)
 define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_strict
 ; CHECK: vector.body:
@@ -10,9 +11,6 @@
 ; CHECK: for.end
 ; CHECK: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX]], %middle.block ]
 ; CHECK: ret float %[[PHI]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_strict
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   br label %for.body
 
@@ -30,6 +28,7 @@
   ret float %add
 }
 
+; CHECK-REMARKS: vectorized loop (vectorization width: 8, interleaved count: 4)
 define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_strict_unroll
 ; CHECK: vector.body:
@@ -46,9 +45,6 @@
 ; CHECK: for.end
 ; CHECK: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX4]], %middle.block ]
 ; CHECK: ret float %[[PHI]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_strict_unroll
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   br label %for.body
 
@@ -74,6 +70,7 @@
 ; }
 ; return sum;
 
+; CHECK-REMARKS: vectorized loop (vectorization width: 8, interleaved count: 4)
 define float @fadd_strict_unroll_last_val(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @fadd_strict_unroll_last_val
 ; CHECK: vector.body
@@ -98,9 +95,6 @@
 ; CHECK: for.end
 ; CHECK: %[[SUM_LCSSA:.*]] = phi float [ %[[FADD_LCSSA]], %for.cond.cleanup ], [ 0.000000e+00, %entry ]
 ; CHECK: ret float %[[SUM_LCSSA]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_strict_unroll_last_val
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   %cmp = icmp sgt i64 %n, 0
   br i1 %cmp, label %for.body, label %for.end
@@ -126,6 +120,7 @@
   ret float %sum.lcssa
 }
 
+; CHECK-REMARKS: vectorized loop (vectorization width: 4, interleaved count: 1)
 define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @fadd_strict_interleave
 ; CHECK: entry
@@ -142,9 +137,6 @@
 ; CHECK: %[[RDX2]] = call float @llvm.vector.reduce.fadd.v4f32(float %[[VEC_PHI1]], <4 x float> %[[STRIDED2]])
 ; CHECK: for.end
 ; CHECK ret void
-
-; CHECK-NO-REORDER-LABEL: @fadd_strict_interleave
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   %arrayidxa = getelementptr inbounds float, float* %a, i64 1
   %a1 = load float, float* %a, align 4
@@ -172,6 +164,7 @@
   ret void
 }
 
+; CHECK-REMARKS: vectorized loop (vectorization width: 4, interleaved count: 1)
 define float @fadd_invariant(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @fadd_invariant
 ; CHECK: vector.body
@@ -185,9 +178,6 @@
 ; CHECK: for.end
 ; CHECK: %[[PHI:.*]] = phi float [ 0.000000e+00, %entry ], [ %[[EXIT_PHI]], %for.end.loopexit ]
 ; CHECK: ret float %[[PHI]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_invariant
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   %arrayidx = getelementptr inbounds float, float* %a, i64 1
   %0 = load float, float* %arrayidx, align 4
@@ -212,6 +202,7 @@
   ret float %res
 }
 
+; CHECK-REMARKS: vectorized loop (vectorization width: 4, interleaved count: 1)
 define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @fadd_conditional
 ; CHECK: vector.body:
@@ -239,9 +230,6 @@
 ; CHECK: for.end
 ; CHECK: %[[RDX_PHI:.*]] = phi float [ %[[FADD]], %for.inc ], [ %[[RDX]], %middle.block ]
 ; CHECK: ret float %[[RDX_PHI]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_conditional
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   br label %for.body
 
@@ -271,6 +259,7 @@
 }
 
 ; Test to check masking correct, using the "llvm.loop.vectorize.predicate.enable" attribute
+; CHECK-REMARKS: vectorized loop (vectorization width: 2, interleaved count: 1)
 define float @fadd_predicated(float* noalias nocapture %a, i64 %n) {
 ; CHECK-LABEL: @fadd_predicated
 ; CHECK: vector.ph
@@ -286,9 +275,6 @@
 ; CHECK: for.end:
 ; CHECK: %[[RES_PHI:.*]] = phi float [ %[[FADD:.*]], %for.body ], [ %[[RDX]], %middle.block ]
 ; CHECK: ret float %[[RES_PHI]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_predicated
-; CHECK-NO-REORDER-NOT: vector.body
 entry:
   br label %for.body
 
@@ -308,28 +294,10 @@
 }
 
 ; Negative test - loop contains multiple fadds which we cannot safely reorder
+; CHECK-REMARKS: loop not vectorized: cannot prove it is safe to reorder floating-point operations
 define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) {
 ; CHECK-LABEL: @fadd_multiple
-; CHECK: vector.body
-; CHECK: %[[PHI:.*]] = phi <8 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
-; CHECK: %[[VEC_LOAD1:.*]] = load <8 x float>, <8 x float>
-; CHECK: %[[VEC_FADD1:.*]] = fadd <8 x float> %[[PHI]], %[[VEC_LOAD1]]
-; CHECK: %[[VEC_LOAD2:.*]] = load <8 x float>, <8 x float>
-; CHECK: %[[VEC_FADD2]] = fadd <8 x float> %[[VEC_FADD1]], %[[VEC_LOAD2]]
-; CHECK: middle.block
-; CHECK: %[[RDX:.*]] = call float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> %[[VEC_FADD2]])
-; CHECK: for.body
-; CHECK: %[[SUM:.*]] = phi float [ %bc.merge.rdx, %scalar.ph ], [ %[[FADD2:.*]], %for.body ]
-; CHECK: %[[LOAD1:.*]] = load float, float*
-; CHECK: %[[FADD1:.*]] = fadd float %sum, %[[LOAD1]]
-; CHECK: %[[LOAD2:.*]] = load float, float*
-; CHECK: %[[FADD2]] = fadd float %[[FADD1]], %[[LOAD2]]
-; CHECK: for.end
-; CHECK: %[[RET:.*]] = phi float [ %[[FADD2]], %for.body ], [ %[[RDX]], %middle.block ]
-; CHECK: ret float %[[RET]]
-
-; CHECK-NO-REORDER-LABEL: @fadd_multiple
-; CHECK-NO-REORDER-NOT: vector.body
+; CHECK-NOT: vector.body
 entry:
   br label %for.body
 
@@ -351,6 +319,121 @@
   ret float %rdx
 }
 
+; Tests with both a floating point reduction & induction, e.g.
+;
+;float fp_iv_rdx_loop(float *values, float init, float * __restrict__ A, int N) {
+;  float fp_inc = 2.0;
+;  float x = init;
+;  float sum = 0.0;
+;  for (int i=0; i < N; ++i) {
+;    A[i] = x;
+;    x += fp_inc;
+;    sum += values[i];
+;  }
+;  return sum;
+;}
+
+; Strict reduction could be performed in-loop, but ordered FP induction variables are not supported
+; CHECK-REMARKS: loop not vectorized: cannot prove it is safe to reorder floating-point operations
+define float @induction_and_reduction(float* nocapture readonly %values, float %init, float* noalias nocapture %A, i64 %N) {
+; CHECK-LABEL: @induction_and_reduction
+; CHECK-NOT: vector.body
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum.015 = phi float [ 0.000000e+00, %entry ], [ %add3, %for.body ]
+  %x.014 = phi float [ %init, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %iv
+  store float %x.014, float* %arrayidx, align 4
+  %add = fadd float %x.014, 2.000000e+00
+  %arrayidx2 = getelementptr inbounds float, float* %values, i64 %iv
+  %0 = load float, float* %arrayidx2, align 4
+  %add3 = fadd float %sum.015, %0
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+  ret float %add3
+}
+
+; As above, but with the FP induction being unordered (fast) the loop can be vectorized
+; CHECK-REMARKS: vectorized loop (vectorization width: 4, interleaved count: 2)
+define float @fast_induction_and_reduction(float* nocapture readonly %values, float %init, float* noalias nocapture %A, i64 %N) {
+; CHECK-LABEL: @fast_induction_and_reduction
+; CHECK: vector.ph
+; CHECK: %[[INDUCTION:.*]] = fadd fast <4 x float> {{.*}}, <float 0.000000e+00, float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>
+; CHECK: vector.body
+; CHECK: %[[RDX_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[FADD2:.*]], %vector.body ]
+; CHECK: %[[IND_PHI:.*]] = phi <4 x float> [ %[[INDUCTION]], %vector.ph ], [ %[[VEC_IND_NEXT:.*]], %vector.body ]
+; CHECK: %[[STEP_ADD:.*]] = fadd fast <4 x float> %[[IND_PHI]], <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+; CHECK: %[[LOAD1:.*]] = load <4 x float>, <4 x float>*
+; CHECK: %[[LOAD2:.*]] = load <4 x float>, <4 x float>*
+; CHECK: %[[FADD1:.*]] = call float @llvm.vector.reduce.fadd.v4f32(float %[[RDX_PHI]], <4 x float> %[[LOAD1]])
+; CHECK: %[[FADD2]] = call float @llvm.vector.reduce.fadd.v4f32(float %[[FADD1]], <4 x float> %[[LOAD2]])
+; CHECK: %[[VEC_IND_NEXT]] = fadd fast <4 x float> %[[STEP_ADD]], <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+; CHECK: for.body
+; CHECK: %[[RDX_SUM_PHI:.*]] = phi float [ {{.*}}, %scalar.ph ], [ %[[FADD3:.*]], %for.body ]
+; CHECK: %[[IND_SUM_PHI:.*]] = phi fast float [ {{.*}}, %scalar.ph ], [ %[[ADD_IND:.*]], %for.body ]
+; CHECK: store float %[[IND_SUM_PHI]], float*
+; CHECK: %[[ADD_IND]] = fadd fast float %[[IND_SUM_PHI]], 2.000000e+00
+; CHECK: %[[LOAD3:.*]] = load float, float*
+; CHECK: %[[FADD3]] = fadd float %[[RDX_SUM_PHI]], %[[LOAD3]]
+; CHECK: for.end
+; CHECK: %[[RES_PHI:.*]] = phi float [ %[[FADD3]], %for.body ], [ %[[FADD2]], %middle.block ]
+; CHECK: ret float %[[RES_PHI]]
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum.015 = phi float [ 0.000000e+00, %entry ], [ %add3, %for.body ]
+  %x.014 = phi fast float [ %init, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %iv
+  store float %x.014, float* %arrayidx, align 4
+  %add = fadd fast float %x.014, 2.000000e+00
+  %arrayidx2 = getelementptr inbounds float, float* %values, i64 %iv
+  %0 = load float, float* %arrayidx2, align 4
+  %add3 = fadd float %sum.015, %0
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+  ret float %add3
+}
+
+; The FP induction is fast, but here we can't vectorize as only one of the reductions is an FAdd that can be performed in-loop
+; CHECK-REMARKS: loop not vectorized: cannot prove it is safe to reorder floating-point operations
+define float @fast_induction_unordered_reduction(float* nocapture readonly %values, float %init, float* noalias nocapture %A, float* noalias nocapture %B, i64 %N) {
+; CHECK-LABEL: @fast_induction_unordered_reduction
+; CHECK-NOT: vector.body
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum2.023 = phi float [ 3.000000e+00, %entry ], [ %mul, %for.body ]
+  %sum.022 = phi float [ 0.000000e+00, %entry ], [ %add3, %for.body ]
+  %x.021 = phi float [ %init, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %iv
+  store float %x.021, float* %arrayidx, align 4
+  %add = fadd fast float %x.021, 2.000000e+00
+  %arrayidx2 = getelementptr inbounds float, float* %values, i64 %iv
+  %0 = load float, float* %arrayidx2, align 4
+  %add3 = fadd float %sum.022, %0
+  %mul = fmul float %sum2.023, %0
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+  %add6 = fadd float %add3, %mul
+  ret float %add6
+}
+
 !0 = distinct !{!0, !4, !7, !9}
 !1 = distinct !{!1, !4, !8, !9}
 !2 = distinct !{!2, !5, !7, !9}