diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -893,11 +893,14 @@ static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock(); for (PHINode &PHI : LoopNestExit->phis()) { - // FIXME: We currently are not able to detect floating point reductions - // and have to use floating point PHIs as a proxy to prevent - // interchanging in the presence of floating point reductions. - if (PHI.getType()->isFloatingPointTy()) - return false; + // Support floating-point reduction only if we allow unsafe fp math. + if (PHI.getType()->isFloatingPointTy()) { + Function *F = OuterLoop->getHeader()->getParent(); + Instruction *I = dyn_cast(followLCSSA(&PHI)); + if (!F->getFnAttribute("unsafe-fp-math").getValueAsBool() || + I == nullptr || !I->isFast()) + return false; + } for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { Instruction *IncomingI = dyn_cast(PHI.getIncomingValue(i)); if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch()) diff --git a/llvm/test/Transforms/LoopInterchange/lcssa.ll b/llvm/test/Transforms/LoopInterchange/lcssa.ll --- a/llvm/test/Transforms/LoopInterchange/lcssa.ll +++ b/llvm/test/Transforms/LoopInterchange/lcssa.ll @@ -135,39 +135,41 @@ ret void } -; FIXME: We currently do not support LCSSA phi nodes involving floating point -; types, as we fail to detect floating point reductions for now. -; REMARK: UnsupportedPHIOuter +; Loops with floating point reductions are interchanged with fastmath. +; REMARK: Interchanged ; REMARK-NEXT: lcssa_04 -define void @lcssa_04() { +define void @lcssa_04() #0 { entry: br label %outer.header outer.header: ; preds = %outer.inc, %entry %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] - %float.outer = phi float [ 1.000000e+00, %entry ], [ 2.000000e+00, %outer.inc ] + %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.outer.next, %outer.inc ] br label %for.body3 for.body3: ; preds = %for.body3, %outer.header %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] + %float.inner = phi float [ %float.inner.next, %for.body3 ], [ %float.outer, %outer.header ] %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer %vA = load i32, i32* %arrayidx5 %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %iv.inner, i64 %iv.outer %vC = load i32, i32* %arrayidx9 %add = add nsw i32 %vA, %vC + %float.inner.next = fadd fast float %float.inner, 1.000000e+00 store i32 %add, i32* %arrayidx5 %iv.inner.next = add nuw nsw i64 %iv.inner, 1 %exitcond = icmp eq i64 %iv.inner.next, 100 br i1 %exitcond, label %outer.inc, label %for.body3 outer.inc: ; preds = %for.body3 + %float.outer.next = phi float [ %float.inner.next, %for.body3 ] %iv.outer.next = add nsw i64 %iv.outer, 1 %cmp = icmp eq i64 %iv.outer.next, 100 br i1 %cmp, label %outer.header, label %for.exit for.exit: ; preds = %outer.inc - %float.outer.lcssa = phi float [ %float.outer, %outer.inc ] + %float.outer.lcssa = phi float [ %float.outer.next, %outer.inc ] store float %float.outer.lcssa, float* @F br label %for.end16 @@ -175,6 +177,8 @@ ret void } +attributes #0 = {"unsafe-fp-math"="true" } + ; PHI node in inner latch with multiple predecessors. ; REMARK: Interchanged ; REMARK-NEXT: lcssa_05 diff --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll --- a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll +++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll @@ -189,3 +189,77 @@ %il.res.lcssa2 = phi i64 [ %sum.inc.amend, %for1.inc ] ret i64 %il.res.lcssa2 } + +; Floating point reductions are interchanged with fast-math. +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Interchanged +; REMARKS-NEXT: Function: test4 + +define float @test4([100 x [100 x float]]* %Arr) #0 { +entry: + br label %outer.header + +outer.header: ; preds = %outer.inc, %entry + %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] + %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %outer.header + %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc, %for.body3 ] + %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] + %arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer + %vA = load float, float* %arrayidx5 + %float.inner.inc = fadd fast float %float.inner, %vA + %iv.inner.next = add nuw nsw i64 %iv.inner, 1 + %exitcond = icmp eq i64 %iv.inner.next, 100 + br i1 %exitcond, label %outer.inc, label %for.body3 + +outer.inc: ; preds = %for.body3 + %float.inner.lcssa = phi float [ %float.inner.inc, %for.body3 ] + %iv.outer.next = add nsw i64 %iv.outer, 1 + %cmp = icmp eq i64 %iv.outer.next, 100 + br i1 %cmp, label %outer.header, label %for.exit + +for.exit: ; preds = %outer.inc + %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ] + ret float %float.outer.lcssa +} + +attributes #0 = {"unsafe-fp-math"="true" } + +; Floating point reductions are not interchanged without fast-math. +; REMARKS: --- !Missed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: UnsupportedExitPHI +; REMARKS-NEXT: Function: test5 + +define float @test5([100 x [100 x float]]* %Arr) { +entry: + br label %outer.header + +outer.header: ; preds = %outer.inc, %entry + %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] + %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %outer.header + %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc, %for.body3 ] + %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] + %arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer + %vA = load float, float* %arrayidx5 + %float.inner.inc = fadd float %float.inner, %vA + %iv.inner.next = add nuw nsw i64 %iv.inner, 1 + %exitcond = icmp eq i64 %iv.inner.next, 100 + br i1 %exitcond, label %outer.inc, label %for.body3 + +outer.inc: ; preds = %for.body3 + %float.inner.lcssa = phi float [ %float.inner.inc, %for.body3 ] + %iv.outer.next = add nsw i64 %iv.outer, 1 + %cmp = icmp eq i64 %iv.outer.next, 100 + br i1 %cmp, label %outer.header, label %for.exit + +for.exit: ; preds = %outer.inc + %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ] + ret float %float.outer.lcssa +} \ No newline at end of file