diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -91,8 +91,10 @@ "Loop has a non-empty preheader with instructions that cannot be moved"); STATISTIC(FusionNotBeneficial, "Fusion is not beneficial"); STATISTIC(NonIdenticalGuards, "Candidates have different guards"); -STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block"); -STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block"); +STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block with " + "instructions that cannot be moved"); +STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block with " + "instructions that cannot be moved"); STATISTIC(NotRotated, "Candidate is not rotated"); enum FusionDependenceAnalysisChoice { @@ -745,22 +747,24 @@ continue; } - // The following two checks look for empty blocks in FC0 and FC1. If - // any of these blocks are non-empty, we do not fuse. This is done - // because we currently do not have the safety checks to determine if - // it is safe to move the blocks past other blocks in the loop. Once - // these checks are added, these conditions can be relaxed. - if (FC0->GuardBranch && !isEmptyExitBlock(*FC0)) { - LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty exit " - "block. Not fusing.\n"); + if (FC0->GuardBranch && + !isSafeToMoveBefore(*FC0->ExitBlock, + *FC1->ExitBlock->getFirstNonPHIOrDbg(), DT, + PDT, DI)) { + LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe " + "instructions in exit block. Not fusing.\n"); reportLoopFusion(*FC0, *FC1, NonEmptyExitBlock); continue; } - if (FC1->GuardBranch && !isEmptyGuardBlock(*FC1)) { - LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty guard " - "block. Not fusing.\n"); + if (FC0->GuardBranch && FC1->GuardBranch && + !isSafeToMoveBefore( + *FC1->GuardBranch->getParent(), + *FC0->GuardBranch->getParent()->getTerminator(), DT, PDT, + DI)) { + LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe " + "instructions in guard block. Not fusing.\n"); reportLoopFusion(*FC0, *FC1, NonEmptyGuardBlock); continue; @@ -1074,38 +1078,6 @@ return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader); } - /// Check that the guard for \p FC *only* contains the cmp/branch for the - /// guard. - /// Once we are able to handle intervening code, any code in the guard block - /// for FC1 will need to be treated as intervening code and checked whether - /// it can safely move around the loops. - bool isEmptyGuardBlock(const FusionCandidate &FC) const { - assert(FC.GuardBranch && "Expecting a fusion candidate with guard branch."); - if (auto *CmpInst = dyn_cast(FC.GuardBranch->getCondition())) { - auto *GuardBlock = FC.GuardBranch->getParent(); - // If the generation of the cmp value is in GuardBlock, then the size of - // the guard block should be 2 (cmp + branch). If the generation of the - // cmp value is in a different block, then the size of the guard block - // should only be 1. - if (CmpInst->getParent() == GuardBlock) - return GuardBlock->size() == 2; - else - return GuardBlock->size() == 1; - } - - return false; - } - - bool isEmptyPreheader(const FusionCandidate &FC) const { - assert(FC.Preheader && "Expecting a valid preheader"); - return FC.Preheader->size() == 1; - } - - bool isEmptyExitBlock(const FusionCandidate &FC) const { - assert(FC.ExitBlock && "Expecting a valid exit block"); - return FC.ExitBlock->size() == 1; - } - /// Simplify the condition of the latch branch of \p FC to true, when both of /// its successors are the same. void simplifyLatchBranch(const FusionCandidate &FC) const { @@ -1385,6 +1357,14 @@ BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock(); BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock(); + // Move instructions from the exit block of FC0 to the beginning of the exit + // block of FC1. + moveInstructionsToTheBeginning(*FC0.ExitBlock, *FC1.ExitBlock, DT, PDT, DI); + + // Move instructions from the guard block of FC1 to the end of the guard + // block of FC0. + moveInstructionsToTheEnd(*FC1GuardBlock, *FC0GuardBlock, DT, PDT, DI); + assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent"); SmallVector TreeUpdates; diff --git a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll --- a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll +++ b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll @@ -213,6 +213,93 @@ for.end: ret void } + +; CHECK: remark: diagnostics_missed.c:67:3: [unsafe_exitblock]: for.first.preheader and for.second.preheader: Candidate has a non-empty exit block with instructions that cannot be moved +define void @unsafe_exitblock(i32* noalias %A, i32* noalias %B, i64 %N) { +for.first.guard: + %cmp3 = icmp slt i64 0, %N + br i1 %cmp3, label %for.first.preheader, label %for.second.guard + +for.first.preheader: + br label %for.first, !dbg !83 + +for.first: + %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04 + store i32 0, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.04, 1 + %cmp = icmp slt i64 %inc, %N + br i1 %cmp, label %for.first, label %for.first.exit + +for.first.exit: + call void @bar() + br label %for.second.guard + +for.second.guard: + %cmp21 = icmp slt i64 0, %N + br i1 %cmp21, label %for.second.preheader, label %for.end + +for.second.preheader: + br label %for.second + +for.second: + %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ] + %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02 + store i32 0, i32* %arrayidx4, align 4 + %inc6 = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc6, %N + br i1 %cmp2, label %for.second, label %for.second.exit + +for.second.exit: + br label %for.end + +for.end: + ret void +} + +; CHECK: remark: diagnostics_missed.c:72:3: [unsafe_guardblock]: for.first.preheader and for.second.preheader: Candidate has a non-empty guard block with instructions that cannot be moved +define void @unsafe_guardblock(i32* noalias %A, i32* noalias %B, i64 %N) { +for.first.guard: + %cmp3 = icmp slt i64 0, %N + br i1 %cmp3, label %for.first.preheader, label %for.second.guard + +for.first.preheader: + br label %for.first, !dbg !86 + +for.first: + %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04 + store i32 0, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.04, 1 + %cmp = icmp slt i64 %inc, %N + br i1 %cmp, label %for.first, label %for.first.exit + +for.first.exit: + br label %for.second.guard + +for.second.guard: + call void @bar() + %cmp21 = icmp slt i64 0, %N + br i1 %cmp21, label %for.second.preheader, label %for.end + +for.second.preheader: + br label %for.second + +for.second: + %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ] + %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02 + store i32 0, i32* %arrayidx4, align 4 + %inc6 = add nsw i64 %j.02, 1 + %cmp2 = icmp slt i64 %inc6, %N + br i1 %cmp2, label %for.second, label %for.second.exit + +for.second.exit: + br label %for.end + +for.end: + ret void +} + declare void @bar() attributes #0 = { nounwind readnone speculatable willreturn } @@ -301,3 +388,9 @@ !78 = !{} !79 = distinct !DILexicalBlock(scope: !77, file: !3, line: 3, column: 5) !80 = !DILocation(line: 62, column: 3, scope: !79) +!81 = distinct !DISubprogram(name: "unsafe_exitblock", scope: !3, file: !3, line: 65, type: !15, scopeLine: 60, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !78) +!82 = distinct !DILexicalBlock(scope: !81, file: !3, line: 3, column: 5) +!83 = !DILocation(line: 67, column: 3, scope: !82) +!84 = distinct !DISubprogram(name: "unsafe_guardblock", scope: !3, file: !3, line: 70, type: !15, scopeLine: 60, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !78) +!85 = distinct !DILexicalBlock(scope: !84, file: !3, line: 3, column: 5) +!86 = !DILocation(line: 72, column: 3, scope: !85) diff --git a/llvm/test/Transforms/LoopFusion/guarded.ll b/llvm/test/Transforms/LoopFusion/guarded.ll --- a/llvm/test/Transforms/LoopFusion/guarded.ll +++ b/llvm/test/Transforms/LoopFusion/guarded.ll @@ -119,3 +119,116 @@ for.end: ret void } + +; Test that `%add` is moved in for.second.exit, and the two loops for.first +; and for.second are fused. + +; CHECK: void @moveinsts_exitblock +; CHECK-LABEL: for.first.guard: +; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end +; CHECK-LABEL: for.first.preheader: +; CHECK-NEXT: br label %for.first +; CHECK-LABEL: for.first: +; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit +; CHECK-LABEL: for.second.exit: +; CHECK-NEXT: %add = add nsw i32 %x, 1 +; CHECK-NEXT: br label %for.end +; CHECK-LABEL: for.end: +; CHECK-NEXT: ret void +define void @moveinsts_exitblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) { +for.first.guard: + %cmp.guard = icmp slt i64 0, %N + br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard + +for.first.preheader: + br label %for.first + +for.first: + %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04 + store i32 0, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.04, 1 + %cmp = icmp slt i64 %inc, %N + br i1 %cmp, label %for.first, label %for.first.exit + +for.first.exit: + %add = add nsw i32 %x, 1 + br label %for.second.guard + +for.second.guard: + br i1 %cmp.guard, label %for.second.preheader, label %for.end + +for.second.preheader: + br label %for.second + +for.second: + %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ] + %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02 + store i32 0, i32* %arrayidx4, align 4 + %inc6 = add nsw i64 %j.02, 1 + %cmp.j = icmp slt i64 %inc6, %N + br i1 %cmp.j, label %for.second, label %for.second.exit + +for.second.exit: + br label %for.end + +for.end: + ret void +} + +; Test that `%add` is moved in for.first.guard, and the two loops for.first +; and for.second are fused. + +; CHECK: void @moveinsts_guardblock +; CHECK-LABEL: for.first.guard: +; CHECK-NEXT: %cmp.guard = icmp slt i64 0, %N +; CHECK-NEXT: %add = add nsw i32 %x, 1 +; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end +; CHECK-LABEL: for.first.preheader: +; CHECK-NEXT: br label %for.first +; CHECK-LABEL: for.first: +; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit +; CHECK-LABEL: for.second.exit: +; CHECK-NEXT: br label %for.end +; CHECK-LABEL: for.end: +; CHECK-NEXT: ret void +define void @moveinsts_guardblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) { +for.first.guard: + %cmp.guard = icmp slt i64 0, %N + br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard + +for.first.preheader: + br label %for.first + +for.first: + %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04 + store i32 0, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.04, 1 + %cmp = icmp slt i64 %inc, %N + br i1 %cmp, label %for.first, label %for.first.exit + +for.first.exit: + br label %for.second.guard + +for.second.guard: + %add = add nsw i32 %x, 1 + br i1 %cmp.guard, label %for.second.preheader, label %for.end + +for.second.preheader: + br label %for.second + +for.second: + %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ] + %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02 + store i32 0, i32* %arrayidx4, align 4 + %inc6 = add nsw i64 %j.02, 1 + %cmp.j = icmp slt i64 %inc6, %N + br i1 %cmp.j, label %for.second, label %for.second.exit + +for.second.exit: + br label %for.end + +for.end: + ret void +}