Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -883,6 +883,34 @@ return true; } +// Support floating-point reduction if we have "unsafe-fp-math" function +// attribute, or if all fp instructions involved have the reassoc flag. +static bool isFPReductionSupported(const Loop *OuterLoop, PHINode &PHI) { + Function *F = OuterLoop->getHeader()->getParent(); + Value *V = dyn_cast(followLCSSA(&PHI)); + // Simple date flow analysis that checks whether all instructions + // involved in the fp reduction allow reassociation. + std::function areAllInstsReassoc; + areAllInstsReassoc = [&areAllInstsReassoc](Value *V) { + if (isa(V) || isa(V) || isa(V)) + return true; + if (isa(V)) { + Instruction *I = dyn_cast(V); + if (!I->hasAllowReassoc()) + return false; + bool AllowReassoc = true; + for (unsigned i = 0; i < I->getNumOperands(); i++) + AllowReassoc &= areAllInstsReassoc(I->getOperand(i)); + return AllowReassoc; + } + return false; + }; + if (F->getFnAttribute("unsafe-fp-math").getValueAsString() != "true") + if (!areAllInstsReassoc(V)) + return false; + return true; +} + // We currently support LCSSA PHI nodes in the outer loop exit, if their // incoming values do not come from the outer loop latch or if the // outer loop latch has a single predecessor. In that case, the value will @@ -893,28 +921,26 @@ static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock(); for (PHINode &PHI : LoopNestExit->phis()) { - // FIXME: We currently are not able to detect floating point reductions - // and have to use floating point PHIs as a proxy to prevent - // interchanging in the presence of floating point reductions. - if (PHI.getType()->isFloatingPointTy()) + if (PHI.getType()->isFloatingPointTy() && + !isFPReductionSupported(OuterLoop, PHI)) return false; for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { - Instruction *IncomingI = dyn_cast(PHI.getIncomingValue(i)); - if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch()) - continue; - - // The incoming value is defined in the outer loop latch. Currently we - // only support that in case the outer loop latch has a single predecessor. - // This guarantees that the outer loop latch is executed if and only if - // the inner loop is executed (because tightlyNested() guarantees that the - // outer loop header only branches to the inner loop or the outer loop - // latch). - // FIXME: We could weaken this logic and allow multiple predecessors, - // if the values are produced outside the loop latch. We would need - // additional logic to update the PHI nodes in the exit block as - // well. - if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr) - return false; + Instruction *IncomingI = dyn_cast(PHI.getIncomingValue(i)); + if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch()) + continue; + + // The incoming value is defined in the outer loop latch. Currently we + // only support that in case the outer loop latch has a single predecessor. + // This guarantees that the outer loop latch is executed if and only if + // the inner loop is executed (because tightlyNested() guarantees that the + // outer loop header only branches to the inner loop or the outer loop + // latch). + // FIXME: We could weaken this logic and allow multiple predecessors, + // if the values are produced outside the loop latch. We would need + // additional logic to update the PHI nodes in the exit block as + // well. + if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr) + return false; } } return true; Index: llvm/test/Transforms/LoopInterchange/lcssa.ll =================================================================== --- llvm/test/Transforms/LoopInterchange/lcssa.ll +++ llvm/test/Transforms/LoopInterchange/lcssa.ll @@ -135,39 +135,41 @@ ret void } -; FIXME: We currently do not support LCSSA phi nodes involving floating point -; types, as we fail to detect floating point reductions for now. -; REMARK: UnsupportedPHIOuter +; Loops with floating point reductions are interchanged with fastmath. +; REMARK: Interchanged ; REMARK-NEXT: lcssa_04 -define void @lcssa_04() { +define void @lcssa_04() #0 { entry: br label %outer.header outer.header: ; preds = %outer.inc, %entry %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] - %float.outer = phi float [ 1.000000e+00, %entry ], [ 2.000000e+00, %outer.inc ] + %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.outer.next, %outer.inc ] br label %for.body3 for.body3: ; preds = %for.body3, %outer.header %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] + %float.inner = phi float [ %float.inner.next, %for.body3 ], [ %float.outer, %outer.header ] %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer %vA = load i32, i32* %arrayidx5 %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %iv.inner, i64 %iv.outer %vC = load i32, i32* %arrayidx9 %add = add nsw i32 %vA, %vC + %float.inner.next = fadd fast float %float.inner, 1.000000e+00 store i32 %add, i32* %arrayidx5 %iv.inner.next = add nuw nsw i64 %iv.inner, 1 %exitcond = icmp eq i64 %iv.inner.next, 100 br i1 %exitcond, label %outer.inc, label %for.body3 outer.inc: ; preds = %for.body3 + %float.outer.next = phi float [ %float.inner.next, %for.body3 ] %iv.outer.next = add nsw i64 %iv.outer, 1 %cmp = icmp eq i64 %iv.outer.next, 100 br i1 %cmp, label %outer.header, label %for.exit for.exit: ; preds = %outer.inc - %float.outer.lcssa = phi float [ %float.outer, %outer.inc ] + %float.outer.lcssa = phi float [ %float.outer.next, %outer.inc ] store float %float.outer.lcssa, float* @F br label %for.end16 @@ -175,6 +177,8 @@ ret void } +attributes #0 = {"unsafe-fp-math"="true" } + ; PHI node in inner latch with multiple predecessors. ; REMARK: Interchanged ; REMARK-NEXT: lcssa_05 Index: llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll =================================================================== --- llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll +++ llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll @@ -189,3 +189,162 @@ %il.res.lcssa2 = phi i64 [ %sum.inc.amend, %for1.inc ] ret i64 %il.res.lcssa2 } + +; Floating point reductions are interchanged with "unsafe-fp-math" +; function attribute. +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Interchanged +; REMARKS-NEXT: Function: test4 + +define float @test4([100 x [100 x float]]* %Arr) #0 { +entry: + br label %outer.header + +outer.header: ; preds = %outer.inc, %entry + %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] + %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %outer.header + %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc, %for.body3 ] + %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] + %arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer + %vA = load float, float* %arrayidx5 + %float.inner.inc = fadd fast float %float.inner, %vA + %iv.inner.next = add nuw nsw i64 %iv.inner, 1 + %exitcond = icmp eq i64 %iv.inner.next, 100 + br i1 %exitcond, label %outer.inc, label %for.body3 + +outer.inc: ; preds = %for.body3 + %float.inner.lcssa = phi float [ %float.inner.inc, %for.body3 ] + %iv.outer.next = add nsw i64 %iv.outer, 1 + %cmp = icmp eq i64 %iv.outer.next, 100 + br i1 %cmp, label %outer.header, label %for.exit + +for.exit: ; preds = %outer.inc + %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ] + ret float %float.outer.lcssa +} + +attributes #0 = {"unsafe-fp-math"="true" } + +; Floating point reductions are not interchanged without "unsafe-fp-math" +; function attribute, and the fp instructions involved do not allow +; reassociation. +; REMARKS: --- !Missed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: UnsupportedExitPHI +; REMARKS-NEXT: Function: test5 + +define float @test5([100 x [100 x float]]* %Arr) { +entry: + br label %outer.header + +outer.header: ; preds = %outer.inc, %entry + %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] + %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %outer.header + %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc, %for.body3 ] + %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] + %arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer + %vA = load float, float* %arrayidx5 + %float.inner.inc = fadd float %float.inner, %vA + %iv.inner.next = add nuw nsw i64 %iv.inner, 1 + %exitcond = icmp eq i64 %iv.inner.next, 100 + br i1 %exitcond, label %outer.inc, label %for.body3 + +outer.inc: ; preds = %for.body3 + %float.inner.lcssa = phi float [ %float.inner.inc, %for.body3 ] + %iv.outer.next = add nsw i64 %iv.outer, 1 + %cmp = icmp eq i64 %iv.outer.next, 100 + br i1 %cmp, label %outer.header, label %for.exit + +for.exit: ; preds = %outer.inc + %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ] + ret float %float.outer.lcssa +} + +; Floating point reductions are interchanged if the function does not have +; "unsafe-fp-math" attribute, but all the fp instructions involved allow +; reassociation. +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Interchanged +; REMARKS-NEXT: Function: test6 + +define float @test6([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) { +entry: + br label %outer.header + +outer.header: ; preds = %outer.inc, %entry + %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] + %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %outer.header + %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ] + %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] + %arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer + %vA = load float, float* %arrayidx5 + %float.inner.inc = fadd reassoc float %float.inner, %vA + %arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer + %vB = load float, float* %arrayidx6 + %float.inner.inc.inc = fadd reassoc float %float.inner.inc, %vB + %iv.inner.next = add nuw nsw i64 %iv.inner, 1 + %exitcond = icmp eq i64 %iv.inner.next, 100 + br i1 %exitcond, label %outer.inc, label %for.body3 + +outer.inc: ; preds = %for.body3 + %float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ] + %iv.outer.next = add nsw i64 %iv.outer, 1 + %cmp = icmp eq i64 %iv.outer.next, 100 + br i1 %cmp, label %outer.header, label %for.exit + +for.exit: ; preds = %outer.inc + %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ] + ret float %float.outer.lcssa +} + +; Floating point reductions are interchanged if the function does not have +; "unsafe-fp-math" attribute, and not all the fp instructions involved allow +; reassociation. +; REMARKS: --- !Missed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: UnsupportedExitPHI +; REMARKS-NEXT: Function: test7 + +define float @test7([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) { +entry: + br label %outer.header + +outer.header: ; preds = %outer.inc, %entry + %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] + %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %outer.header + %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ] + %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] + %arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer + %vA = load float, float* %arrayidx5 + %float.inner.inc = fadd float %float.inner, %vA ; do not allow reassociation + %arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer + %vB = load float, float* %arrayidx6 + %float.inner.inc.inc = fadd reassoc float %float.inner.inc, %vB + %iv.inner.next = add nuw nsw i64 %iv.inner, 1 + %exitcond = icmp eq i64 %iv.inner.next, 100 + br i1 %exitcond, label %outer.inc, label %for.body3 + +outer.inc: ; preds = %for.body3 + %float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ] + %iv.outer.next = add nsw i64 %iv.outer, 1 + %cmp = icmp eq i64 %iv.outer.next, 100 + br i1 %cmp, label %outer.header, label %for.exit + +for.exit: ; preds = %outer.inc + %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ] + ret float %float.outer.lcssa +} \ No newline at end of file