Index: llvm/lib/Analysis/IVDescriptors.cpp
===================================================================
--- llvm/lib/Analysis/IVDescriptors.cpp
+++ llvm/lib/Analysis/IVDescriptors.cpp
@@ -309,6 +309,10 @@
   // flags from all the reduction operations.
   FastMathFlags FMF = FastMathFlags::getFast();
 
+  // The first instruction in the use-def chain of the Phi node that requires
+  // exact floating point operations.
+  Instruction *ExactFPMathInst = nullptr;
+
   // A value in the reduction can be used:
   //  - By the reduction:
   //      - Reduction operation:
@@ -352,6 +356,9 @@
     if (Cur != Start) {
       ReduxDesc =
           isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
+      ExactFPMathInst = ExactFPMathInst == nullptr
+                            ? ReduxDesc.getExactFPMathInst()
+                            : ExactFPMathInst;
       if (!ReduxDesc.isRecurrence())
         return false;
       // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
@@ -480,8 +487,10 @@
   if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
     return false;
 
-  const bool IsOrdered = checkOrderedReduction(
-      Kind, ReduxDesc.getExactFPMathInst(), ExitInstruction, Phi);
+  // const bool IsOrdered = checkOrderedReduction(
+  //     Kind, ReduxDesc.getExactFPMathInst(), ExitInstruction, Phi);
+  const bool IsOrdered =
+      checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
 
   if (Start != Phi) {
     // If the starting value is not the same as the phi node, we speculatively
@@ -538,9 +547,8 @@
   // is saved as part of the RecurrenceDescriptor.
 
   // Save the description of this reduction variable.
-  RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF,
-                          ReduxDesc.getExactFPMathInst(), RecurrenceType,
-                          IsSigned, IsOrdered, CastInsts,
+  RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst,
+                          RecurrenceType, IsSigned, IsOrdered, CastInsts,
                           MinWidthCastToRecurrenceType);
   RedDes = RD;
 
Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -68,6 +68,10 @@
 // TODO: Check if we can use a sparse matrix here.
 using CharMatrix = std::vector<std::vector<char>>;
 
+/// ReductionList contains the reduction descriptors that corresponds
+/// to each reduction PHI node, which will be used to determine whether
+/// it is safe to reorder floating-point instructions in interchange.
+using ReductionList = SmallDenseMap<Value *, RecurrenceDescriptor>;
 } // end anonymous namespace
 
 // Maximum number of dependencies that can be handled in the dependency matrix.
@@ -349,6 +353,11 @@
 
   /// Set of inner loop induction PHIs
   SmallVector<PHINode *, 8> InnerLoopInductions;
+
+  /// Keep track of reduction descriptors to determine whether floating-point
+  /// PHIs are supported for interchange, i.e., whether the reduction operations
+  /// can be reordered.
+  ReductionList RedDesc;
 };
 
 /// LoopInterchangeProfitability checks if it is profitable to interchange the
@@ -723,7 +732,8 @@
 }
 
 // Check V's users to see if it is involved in a reduction in L.
-static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
+static PHINode *findInnerReductionPhi(Loop *L, Value *V,
+                                      ReductionList &RedDesc) {
   // Reduction variables cannot be constants.
   if (isa<Constant>(V))
     return nullptr;
@@ -733,8 +743,10 @@
       if (PHI->getNumIncomingValues() == 1)
         continue;
       RecurrenceDescriptor RD;
-      if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
+      if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
+        RedDesc[V] = RD;
         return PHI;
+      }
       return nullptr;
     }
   }
@@ -766,7 +778,7 @@
         // Check if we have a PHI node in the outer loop that has a reduction
         // result from the inner loop as an incoming value.
         Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
-        PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
+        PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V, RedDesc);
         if (!InnerRedPhi ||
             !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
           LLVM_DEBUG(
@@ -882,7 +894,6 @@
   }
   return true;
 }
-
 // We currently support LCSSA PHI nodes in the outer loop exit, if their
 // incoming values do not come from the outer loop latch or if the
 // outer loop latch has a single predecessor. In that case, the value will
@@ -890,31 +901,30 @@
 // will still be true after interchanging. If we have multiple predecessor,
 // that may not be the case, e.g. because the outer loop latch may be executed
 // if the inner loop is not executed.
-static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
+static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop,
+                                          ReductionList &RedDesc) {
   BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
   for (PHINode &PHI : LoopNestExit->phis()) {
-    //  FIXME: We currently are not able to detect floating point reductions
-    //         and have to use floating point PHIs as a proxy to prevent
-    //         interchanging in the presence of floating point reductions.
-    if (PHI.getType()->isFloatingPointTy())
+    if (PHI.getType()->isFloatingPointTy() &&
+        RedDesc[followLCSSA(&PHI)].getExactFPMathInst() != nullptr)
       return false;
     for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
-     Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
-     if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
-       continue;
-
-     // The incoming value is defined in the outer loop latch. Currently we
-     // only support that in case the outer loop latch has a single predecessor.
-     // This guarantees that the outer loop latch is executed if and only if
-     // the inner loop is executed (because tightlyNested() guarantees that the
-     // outer loop header only branches to the inner loop or the outer loop
-     // latch).
-     // FIXME: We could weaken this logic and allow multiple predecessors,
-     //        if the values are produced outside the loop latch. We would need
-     //        additional logic to update the PHI nodes in the exit block as
-     //        well.
-     if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
-       return false;
+      Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
+      if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
+        continue;
+
+      // The incoming value is defined in the outer loop latch. Currently we
+      // only support that in case the outer loop latch has a single predecessor.
+      // This guarantees that the outer loop latch is executed if and only if
+      // the inner loop is executed (because tightlyNested() guarantees that the
+      // outer loop header only branches to the inner loop or the outer loop
+      // latch).
+      // FIXME: We could weaken this logic and allow multiple predecessors,
+      //        if the values are produced outside the loop latch. We would need
+      //        additional logic to update the PHI nodes in the exit block as
+      //        well.
+      if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
+        return false;
     }
   }
   return true;
@@ -1038,7 +1048,7 @@
     return false;
   }
 
-  if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
+  if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop, RedDesc)) {
     LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
     ORE->emit([&]() {
       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
Index: llvm/test/Transforms/LoopInterchange/lcssa.ll
===================================================================
--- llvm/test/Transforms/LoopInterchange/lcssa.ll
+++ llvm/test/Transforms/LoopInterchange/lcssa.ll
@@ -135,9 +135,8 @@
   ret void
 }
 
-; FIXME: We currently do not support LCSSA phi nodes involving floating point
-;        types, as we fail to detect floating point reductions for now.
-; REMARK: UnsupportedPHIOuter
+; Loops with floating point reductions are interchanged with fastmath.
+; REMARK: Interchanged
 ; REMARK-NEXT: lcssa_04
 
 define void @lcssa_04() {
@@ -146,28 +145,31 @@
 
 outer.header:                                     ; preds = %outer.inc, %entry
   %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
-  %float.outer = phi float [ 1.000000e+00, %entry ], [ 2.000000e+00, %outer.inc ]
+  %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.outer.next, %outer.inc ]
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %outer.header
   %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
+  %float.inner = phi float [ %float.inner.next, %for.body3 ], [ %float.outer, %outer.header ]
   %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer
   %vA = load i32, i32* %arrayidx5
   %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %iv.inner, i64 %iv.outer
   %vC = load i32, i32* %arrayidx9
   %add = add nsw i32 %vA, %vC
+  %float.inner.next = fadd fast float %float.inner, 1.000000e+00
   store i32 %add, i32* %arrayidx5
   %iv.inner.next = add nuw nsw i64 %iv.inner, 1
   %exitcond = icmp eq i64 %iv.inner.next, 100
   br i1 %exitcond, label %outer.inc, label %for.body3
 
 outer.inc:                                        ; preds = %for.body3
+  %float.outer.next = phi float [ %float.inner.next, %for.body3 ]
   %iv.outer.next = add nsw i64 %iv.outer, 1
   %cmp = icmp eq i64 %iv.outer.next, 100
   br i1 %cmp, label %outer.header, label %for.exit
 
 for.exit:                                         ; preds = %outer.inc
-  %float.outer.lcssa = phi float [ %float.outer, %outer.inc ]
+  %float.outer.lcssa = phi float [ %float.outer.next, %outer.inc ]
   store float %float.outer.lcssa, float* @F
   br label %for.end16
 
Index: llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
===================================================================
--- llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
+++ llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
@@ -189,3 +189,83 @@
   %il.res.lcssa2 = phi i64 [ %sum.inc.amend, %for1.inc ]
   ret i64 %il.res.lcssa2
 }
+
+; Floating point reductions are interchanged if all the fp instructions 
+; involved allow reassociation.
+; REMARKS: --- !Passed
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Interchanged
+; REMARKS-NEXT: Function:        test4
+
+define float @test4([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) {
+entry:
+  br label %outer.header
+
+outer.header:                                     ; preds = %outer.inc, %entry
+  %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
+  %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %outer.header
+  %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
+  %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
+  %arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
+  %vA = load float, float* %arrayidx5
+  %float.inner.inc = fadd fast float %float.inner, %vA
+  %arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
+  %vB = load float, float* %arrayidx6
+  %float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
+  %iv.inner.next = add nuw nsw i64 %iv.inner, 1
+  %exitcond = icmp eq i64 %iv.inner.next, 100
+  br i1 %exitcond, label %outer.inc, label %for.body3
+
+outer.inc:                                        ; preds = %for.body3
+  %float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
+  %iv.outer.next = add nsw i64 %iv.outer, 1
+  %cmp = icmp eq i64 %iv.outer.next, 100
+  br i1 %cmp, label %outer.header, label %for.exit
+
+for.exit:                                         ; preds = %outer.inc
+  %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
+  ret float %float.outer.lcssa
+}
+
+; Floating point reductions are not interchanged if not all the fp instructions 
+; involved allow reassociation.
+; REMARKS: --- !Missed
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            UnsupportedExitPHI
+; REMARKS-NEXT: Function:        test5
+
+define float @test5([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) {
+entry:
+  br label %outer.header
+
+outer.header:                                     ; preds = %outer.inc, %entry
+  %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
+  %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %outer.header
+  %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
+  %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
+  %arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
+  %vA = load float, float* %arrayidx5
+  %float.inner.inc = fadd float %float.inner, %vA ; do not allow reassociation
+  %arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
+  %vB = load float, float* %arrayidx6
+  %float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
+  %iv.inner.next = add nuw nsw i64 %iv.inner, 1
+  %exitcond = icmp eq i64 %iv.inner.next, 100
+  br i1 %exitcond, label %outer.inc, label %for.body3
+
+outer.inc:                                        ; preds = %for.body3
+  %float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
+  %iv.outer.next = add nsw i64 %iv.outer, 1
+  %cmp = icmp eq i64 %iv.outer.next, 100
+  br i1 %cmp, label %outer.header, label %for.exit
+
+for.exit:                                         ; preds = %outer.inc
+  %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
+  ret float %float.outer.lcssa
+}
\ No newline at end of file