Index: llvm/include/llvm/Analysis/IVDescriptors.h
===================================================================
--- llvm/include/llvm/Analysis/IVDescriptors.h
+++ llvm/include/llvm/Analysis/IVDescriptors.h
@@ -69,9 +69,11 @@
 
   RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K,
                        FastMathFlags FMF, Instruction *ExactFP, Type *RT,
-                       bool Signed, SmallPtrSetImpl<Instruction *> &CI)
+                       bool Signed, bool Ordered,
+                       SmallPtrSetImpl<Instruction *> &CI)
       : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
-        ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed) {
+        ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed),
+        IsOrdered(Ordered) {
     CastInsts.insert(CI.begin(), CI.end());
   }
 
@@ -227,6 +229,9 @@
   /// Returns true if all source operands of the recurrence are SExtInsts.
   bool isSigned() const { return IsSigned; }
 
+  /// Expose an ordered FP reduction to the instance users.
+  bool isOrdered() const { return IsOrdered; }
+
   /// Attempts to find a chain of operations from Phi to LoopExitInst that can
   /// be treated as a set of reductions instructions for in-loop reductions.
   SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi,
@@ -249,6 +254,8 @@
   Type *RecurrenceType = nullptr;
   // True if all source operands of the recurrence are SExtInsts.
   bool IsSigned = false;
+  // If this is an ordered reduction
+  bool IsOrdered = false;
   // Instructions used for type-promoting the recurrence.
   SmallPtrSet<Instruction *, 8> CastInsts;
 };
Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h
===================================================================
--- llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -389,6 +389,11 @@
 Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
                              RecurrenceDescriptor &Desc, Value *Src);
 
+/// Create an ordered reduction intrinsic using the given recurrence
+/// descriptor \p Desc.
+Value *createOrderedReduction(IRBuilderBase &B, RecurrenceDescriptor &Desc,
+                              Value *Src, Value *Start, Value *Predicate);
+
 /// Get the intersection (logical and) of all of the potential IR flags
 /// of each scalar operation (VL) that will be converted into a vector (I).
 /// If OpValue is non-null, we only consider operations similar to OpValue
Index: llvm/lib/Analysis/IVDescriptors.cpp
===================================================================
--- llvm/lib/Analysis/IVDescriptors.cpp
+++ llvm/lib/Analysis/IVDescriptors.cpp
@@ -189,6 +189,48 @@
   }
 }
 
+// Check if a given Phi node can be recognized as an ordered reduction for
+// vectorizing floating point operations without unsafe math.
+static bool checkOrderedReduction(RecurKind Kind, Instruction *Exit,
+                                  PHINode *Phi) {
+  // Currently only FAdd is supported
+  if (Kind != RecurKind::FAdd)
+    return false;
+
+  bool IsOrdered = Exit->getOpcode() == Instruction::FAdd &&
+                   !cast<FPMathOperator>(Exit)->hasAllowReassoc();
+
+  // If this comes from a Phi node, look through it
+  if (auto *EIP = dyn_cast<PHINode>(Exit)) {
+    if (EIP->getNumIncomingValues() != 2)
+      return false;
+
+    auto *ChainVal = EIP->getIncomingValue(0) == Phi ? EIP->getIncomingValue(1)
+                                                     : EIP->getIncomingValue(0);
+
+    if (!isa<Instruction>(ChainVal))
+      return false;
+
+    Exit = cast<Instruction>(ChainVal);
+    IsOrdered = Exit->getOpcode() == Instruction::FAdd &&
+                !cast<FPMathOperator>(Exit)->hasAllowReassoc();
+  }
+
+  // The only pattern accepted is the one in which the reduction PHI
+  // is used as one of the operands of the exit instruction
+  auto *LHS = Exit->getOperand(0);
+  auto *RHS = Exit->getOperand(1);
+  IsOrdered = IsOrdered && ((LHS == Phi) || (RHS == Phi));
+
+  if (!IsOrdered)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
+                    << ", ExitInst: " << *Exit << "\n");
+
+  return true;
+}
+
 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
                                            Loop *TheLoop, FastMathFlags FuncFMF,
                                            RecurrenceDescriptor &RedDes,
@@ -416,6 +458,8 @@
   if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
     return false;
 
+  const bool IsOrdered = checkOrderedReduction(Kind, ExitInstruction, Phi);
+
   if (Start != Phi) {
     // If the starting value is not the same as the phi node, we speculatively
     // looked through an 'and' instruction when evaluating a potential
@@ -470,7 +514,7 @@
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF,
                           ReduxDesc.getExactFPMathInst(), RecurrenceType,
-                          IsSigned, CastInsts);
+                          IsSigned, IsOrdered, CastInsts);
   RedDes = RD;
 
   return true;
Index: llvm/lib/Transforms/Utils/LoopUtils.cpp
===================================================================
--- llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1070,6 +1070,22 @@
   return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind());
 }
 
+Value *llvm::createOrderedReduction(IRBuilderBase &B,
+                                    RecurrenceDescriptor &Desc, Value *Src,
+                                    Value *Start, Value *Predicate) {
+  auto Kind = Desc.getRecurrenceKind();
+  assert(Kind == RecurKind::FAdd && "Unexpected reduction kind");
+  assert(Src->getType()->isVectorTy() && "Expected a vector type");
+  assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
+
+  std::function<Value *(Value *, Value *)> CreateRdx;
+  // Predication is done by masking out the inactive elements of the vector
+  // source with a safe identity value.
+  auto *MaskedVec = B.CreateSelect(
+      Predicate, Src, Desc.getRecurrenceIdentity(Kind, Src->getType()));
+  return B.CreateFAddReduce(Start, MaskedVec);
+}
+
 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
   auto *VecOp = dyn_cast<Instruction>(I);
   if (!VecOp)
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -325,6 +325,11 @@
                            cl::desc("Prefer in-loop vector reductions, "
                                     "overriding the targets preference."));
 
+cl::opt<bool> EnableStrictReductions(
+    "enable-strict-reducs", cl::init(false), cl::Hidden,
+    cl::desc("Enable the vectorisation of loops with in-order (strict) "
+             "FP reductions"));
+
 static cl::opt<bool> PreferPredicatedReductionSelect(
     "prefer-predicated-reduction-select", cl::init(false), cl::Hidden,
     cl::desc(
@@ -4228,6 +4233,10 @@
       LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
 }
 
+static bool useOrderedReductions(RecurrenceDescriptor &RdxDesc) {
+  return EnableStrictReductions && RdxDesc.isOrdered();
+}
+
 void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
   // Get it's reduction variable descriptor.
   assert(Legal->isReductionVariable(Phi) &&
@@ -4349,16 +4358,20 @@
   // accidentally cause an extra step back into the loop while debugging.
   setDebugLocFromInst(Builder, LoopMiddleBlock->getTerminator());
   {
-    // Floating-point operations should have some FMF to enable the reduction.
-    IRBuilderBase::FastMathFlagGuard FMFG(Builder);
-    Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
-    for (unsigned Part = 1; Part < UF; ++Part) {
-      Value *RdxPart = State.get(LoopExitInstDef, Part);
-      if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
-        ReducedPartRdx = Builder.CreateBinOp(
-            (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
-      } else {
-        ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
+    if (Cost->isInLoopReduction(Phi) && useOrderedReductions(RdxDesc))
+      ReducedPartRdx = State.get(LoopExitInstDef, UF - 1);
+    else {
+      // Floating-point operations should have some FMF to enable the reduction.
+      IRBuilderBase::FastMathFlagGuard FMFG(Builder);
+      Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
+      for (unsigned Part = 1; Part < UF; ++Part) {
+        Value *RdxPart = State.get(LoopExitInstDef, Part);
+        if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
+          ReducedPartRdx = Builder.CreateBinOp(
+              (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
+        } else {
+          ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
+        }
       }
     }
   }
@@ -6045,7 +6058,7 @@
         if (!Legal->isReductionVariable(PN))
           continue;
         RecurrenceDescriptor RdxDesc = Legal->getReductionVars()[PN];
-        if (PreferInLoopReductions ||
+        if (PreferInLoopReductions || useOrderedReductions(RdxDesc) ||
             TTI.preferInLoopReduction(RdxDesc.getOpcode(),
                                       RdxDesc.getRecurrenceType(),
                                       TargetTransformInfo::ReductionFlags()))
@@ -7614,7 +7627,7 @@
     // If the target would prefer this reduction to happen "in-loop", then we
     // want to record it as such.
     unsigned Opcode = RdxDesc.getOpcode();
-    if (!PreferInLoopReductions &&
+    if (!PreferInLoopReductions && !useOrderedReductions(RdxDesc) &&
         !TTI.preferInLoopReduction(Opcode, Phi->getType(),
                                    TargetTransformInfo::ReductionFlags()))
       continue;
@@ -9101,8 +9114,10 @@
 
 void VPReductionRecipe::execute(VPTransformState &State) {
   assert(!State.Instance && "Reduction being replicated.");
+  Value *PrevInChain = State.get(getChainOp(), 0);
   for (unsigned Part = 0; Part < State.UF; ++Part) {
     RecurKind Kind = RdxDesc->getRecurrenceKind();
+    bool IsOrdered = useOrderedReductions(*RdxDesc);
     Value *NewVecOp = State.get(getVecOp(), Part);
     if (VPValue *Cond = getCondOp()) {
       Value *NewCond = State.get(Cond, Part);
@@ -9114,18 +9129,31 @@
       Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec);
       NewVecOp = Select;
     }
-    Value *NewRed =
-        createTargetReduction(State.Builder, TTI, *RdxDesc, NewVecOp);
-    Value *PrevInChain = State.get(getChainOp(), Part);
+    Value *NewRed;
     Value *NextInChain;
+    if (IsOrdered) {
+      Value *C = ConstantInt::get(
+          IntegerType::getInt1Ty(State.Builder.getContext()), 1);
+      Value *MaskOfOnes =
+          State.Builder.CreateVectorSplat(State.VF, C, "broadcast");
+      NewRed = createOrderedReduction(State.Builder, *RdxDesc, NewVecOp,
+                                      PrevInChain, MaskOfOnes);
+      PrevInChain = NewRed;
+    } else {
+      PrevInChain = State.get(getChainOp(), Part);
+      NewRed = createTargetReduction(State.Builder, TTI, *RdxDesc, NewVecOp);
+    }
     if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
       NextInChain =
           createMinMaxOp(State.Builder, RdxDesc->getRecurrenceKind(),
                          NewRed, PrevInChain);
     } else {
-      NextInChain = State.Builder.CreateBinOp(
-          (Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(), NewRed,
-          PrevInChain);
+      if (IsOrdered)
+        NextInChain = NewRed;
+      else
+        NextInChain = State.Builder.CreateBinOp(
+            (Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(), NewRed,
+            PrevInChain);
     }
     State.set(this, NextInChain, Part);
   }
Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -0,0 +1,163 @@
+; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reducs -S | FileCheck %s -check-prefix=CHECK
+
+define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
+; CHECK-LABEL: @fadd_strict
+; CHECK: vector.body:
+; CHECK: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
+; CHECK: %[[LOAD:.*]] = load <8 x float>, <8 x float>*
+; CHECK: %[[MASKED_VEC:.*]] = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> %[[LOAD]], <8 x float> zeroinitializer
+; CHECK: %[[RDX]] = call float @llvm.vector.reduce.fadd.v8f32(float %[[VEC_PHI]], <8 x float> %[[MASKED_VEC]])
+; CHECK: for.end
+; CHECK: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX]], %middle.block ]
+; CHECK: ret float %[[PHI]]
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
+  %0 = load float, float* %arrayidx, align 4
+  %add = fadd float %0, %sum.07
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret float %add
+}
+
+define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) {
+; CHECK-LABEL: @fadd_strict_unroll
+; CHECK: vector.body:
+; CHECK: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX1:.*]], %vector.body ]
+; CHECK: %[[VEC_PHI2:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX2:.*]], %vector.body ]
+; CHECK: %[[LOAD1:.*]] = load <8 x float>, <8 x float>*
+; CHECK: %[[LOAD2:.*]] = load <8 x float>, <8 x float>*
+; CHECK: %[[MASKED_VEC1:.*]] = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> %[[LOAD1]], <8 x float> zeroinitializer
+; CHECK: %[[RDX1:.*]] = call float @llvm.vector.reduce.fadd.v8f32(float %[[VEC_PHI1]], <8 x float> %[[MASKED_VEC1]])
+; CHECK: %[[MASKED_VEC2:.*]] = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> %[[LOAD2]], <8 x float> zeroinitializer
+; CHECK: %[[RDX2]] = call float @llvm.vector.reduce.fadd.v8f32(float %[[RDX1]], <8 x float> %[[MASKED_VEC2]])
+; CHECK: for.end
+; CHECK: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX2]], %middle.block ]
+; CHECK: ret float %[[PHI]]
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
+  %0 = load float, float* %arrayidx, align 4
+  %add = fadd float %0, %sum.07
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:
+  ret float %add
+}
+
+define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: @fadd_strict_interleave
+; CHECK: entry
+; CHECK: %[[ARRAYIDX:.*]] = getelementptr inbounds float, float* %a, i64 1
+; CHECK: %[[LOAD1:.*]] = load float, float* %a
+; CHECK: %[[LOAD2:.*]] = load float, float* %[[ARRAYIDX]]
+; CHECK: vector.body
+; CHECK: %[[VEC_PHI1:.*]] = phi float [ %[[LOAD2]], %vector.ph ], [ %[[RDX2:.*]], %vector.body ]
+; CHECK: %[[VEC_PHI2:.*]] = phi float [ %[[LOAD1]], %vector.ph ], [ %[[RDX1:.*]], %vector.body ]
+; CHECK: %[[WIDE_LOAD:.*]] = load <8 x float>, <8 x float>*
+; CHECK: %[[STRIDED1:.*]] = shufflevector <8 x float> %[[WIDE_LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: %[[STRIDED2:.*]] = shufflevector <8 x float> %[[WIDE_LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: %[[MASKED_VEC1:.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> %[[STRIDED1]], <4 x float> zeroinitializer
+; CHECK: %[[RDX1]] = call float @llvm.vector.reduce.fadd.v4f32(float %[[VEC_PHI2]], <4 x float> %[[MASKED_VEC1]])
+; CHECK: %[[MASKED_VEC2:.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> %[[STRIDED2]], <4 x float> zeroinitializer
+; CHECK: %[[RDX2]] = call float @llvm.vector.reduce.fadd.v4f32(float %[[VEC_PHI1]], <4 x float> %[[MASKED_VEC2]])
+; CHECK: for.end
+; CHECK ret void
+entry:
+  %arrayidxa = getelementptr inbounds float, float* %a, i64 1
+  %a1 = load float, float* %a, align 4
+  %a2 = load float, float* %arrayidxa, align 4
+  br label %for.body
+
+for.body:
+  %add.phi1 = phi float [ %a2, %entry ], [ %add2, %for.body ]
+  %add.phi2 = phi float [ %a1, %entry ], [ %add1, %for.body ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidxb1 = getelementptr inbounds float, float* %b, i64 %iv
+  %0 = load float, float* %arrayidxb1, align 4
+  %add1 = fadd float %0, %add.phi2
+  %or = or i64 %iv, 1
+  %arrayidxb2 = getelementptr inbounds float, float* %b, i64 %or
+  %1 = load float, float* %arrayidxb2, align 4
+  %add2 = fadd float %1, %add.phi1
+  %iv.next = add nuw nsw i64 %iv, 2
+  %exitcond.not = icmp eq i64 %iv.next, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !2
+
+for.end:
+  store float %add1, float* %a, align 4
+  store float %add2, float* %arrayidxa, align 4
+  ret void
+}
+
+; Check we can vectorise conditional ordered reductions. Note the following
+; loop contains a mixture of normal and reducing adds, which check only the
+; reducing add is matched as an ordered instruction.
+
+; float func(float* restrict a, float* restrict b, int count) {
+;   float res = 0.0f;
+;   for (int i = 0; i < count; ++i)
+;     if (a[1] > 0.5f)
+;       res += a[i] + b[i];
+;   return res;
+; }
+
+define float @fadd_conditional_rdx(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: @fadd_conditional_rdx
+; CHECK: vector.body
+; CHECK: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
+; CHECK: %[[LOAD1:.*]] = load <4 x float>, <4 x float>*
+; CHECK: %[[LOAD2:.*]] = load <4 x float>, <4 x float>*
+; CHECK: %[[ADD:.*]] = fadd <4 x float> %[[LOAD1]], %[[LOAD2]]
+; CHECK: %[[MASKED_VEC:.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> %[[ADD]], <4 x float> zeroinitializer
+; CHECK: %[[RDX]] = call float @llvm.vector.reduce.fadd.v4f32(float %[[VEC_PHI1]], <4 x float> %[[MASKED_VEC]])
+; CHECK: for.end.loopexit
+; CHECK: %[[EXIT_PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX]], %middle.block ]
+; CHECK: for.end
+; CHECK: %[[PHI:.*]] = phi float [ 0.000000e+00, %entry ], [ %[[EXIT_PHI]], %for.end.loopexit ]
+; CHECK: ret float %[[PHI]]
+entry:
+  %arrayidx = getelementptr inbounds float, float* %a, i64 1
+  %0 = load float, float* %arrayidx, align 4
+  %cmp1 = fcmp ogt float %0, 5.000000e-01
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                      ; preds = %for.body
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %res.014 = phi float [ 0.000000e+00, %entry ], [ %rdx, %for.body ]
+  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %iv
+  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds float, float* %b, i64 %iv
+  %2 = load float, float* %arrayidx4, align 4
+  %add = fadd float %1, %2
+  %rdx = fadd float %res.014, %add
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !2
+
+for.end:                                 ; preds = %for.body, %entry
+  %res = phi float [ 0.000000e+00, %entry ], [ %rdx, %for.body ]
+  ret float %res
+}
+
+!0 = distinct !{!0, !3, !5, !7}
+!1 = distinct !{!1, !3, !6, !7}
+!2 = distinct !{!2, !4, !5, !7}
+!3 = !{!"llvm.loop.vectorize.width", i32 8}
+!4 = !{!"llvm.loop.vectorize.width", i32 4}
+!5 = !{!"llvm.loop.interleave.count", i32 1}
+!6 = !{!"llvm.loop.interleave.count", i32 2}
+!7 = !{!"llvm.loop.vectorize.enable", i1 true}