Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -234,7 +234,7 @@ bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void eliminateMostlyEmptyBlock(BasicBlock *BB); bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, - bool isPreheader); + bool isPreheader, bool isExit); bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); bool optimizeInst(Instruction *I, bool &ModifiedDT); bool optimizeMemoryInst(Instruction *I, Value *Addr, @@ -645,12 +645,17 @@ /// blocks so we can split them the way we want them. bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { SmallPtrSet Preheaders; + SmallVector ExitBlocks; + SmallPtrSet ExitBBSet; SmallVector LoopList(LI->begin(), LI->end()); while (!LoopList.empty()) { Loop *L = LoopList.pop_back_val(); LoopList.insert(LoopList.end(), L->begin(), L->end()); if (BasicBlock *Preheader = L->getLoopPreheader()) Preheaders.insert(Preheader); + L->getExitBlocks(ExitBlocks); + while(!ExitBlocks.empty()) + ExitBBSet.insert(ExitBlocks.pop_back_val()); } bool MadeChange = false; @@ -658,10 +663,12 @@ for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { BasicBlock *BB = &*I++; BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB); - if (!DestBB || - !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) + if (!DestBB || !isMergingEmptyBlockProfitable( + BB, DestBB, Preheaders.count(BB), ExitBBSet.count(BB))) continue; + ExitBBSet.erase(BB); + Preheaders.erase(BB); eliminateMostlyEmptyBlock(BB); MadeChange = true; } @@ -670,7 +677,8 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, - bool isPreheader) { + bool isPreheader, + bool isExit) { // Do not delete loop preheaders if doing so would create a critical edge. // Loop preheaders can be good locations to spill registers. If the // preheader is deleted and we create a critical edge, registers may be @@ -694,6 +702,11 @@ isa(Pred->getTerminator()))) return true; + // If the destination block is almost empty exit block then this block + // was created by loopsimplify and we can merge it. + if (isExit && DestBB->getTerminator() == DestBB->getFirstNonPHI()) + return true; + if (BB->getTerminator() != BB->getFirstNonPHI()) return true; Index: test/Transforms/CodeGenPrepare/merge-empty-latch-block.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/merge-empty-latch-block.ll @@ -0,0 +1,117 @@ +; RUN: opt -codegenprepare < %s -mtriple=aarch64-none-linux-gnu -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +; Expect to merge empty latch block as it will hoist the jump through backedge. +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@finput = external local_unnamed_addr global %struct._IO_FILE*, align 8 +@.str = external hidden unnamed_addr constant [23 x i8], align 1 +@lineno = external local_unnamed_addr global i32, align 4 +@.str.1 = external hidden unnamed_addr constant [21 x i8], align 1 + +; Function Attrs: nounwind +; CHECK-LABEL: @skip_white_space +define i32 @skip_white_space() local_unnamed_addr #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %entry + %0 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call11 = tail call i32 @_IO_getc(%struct._IO_FILE* %0) + switch i32 %call11, label %sw.default [ + i32 47, label %sw.bb + i32 10, label %sw.bb25 + i32 32, label %for.cond.backedge + i32 9, label %for.cond.backedge + i32 12, label %for.cond.backedge + ] + +sw.bb: ; preds = %for.cond + %1 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call1 = tail call i32 @_IO_getc(%struct._IO_FILE* %1) + %cmp = icmp eq i32 %call1, 42 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %sw.bb + tail call void @fatals(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i64 0, i64 0), i32 %call1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) #3 + br label %if.end + +if.end: ; preds = %if.then, %sw.bb + %2 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call2 = tail call i32 @_IO_getc(%struct._IO_FILE* %2) + br label %while.body + +; CHECK-LABEL: while.body +; CHECK: %c.140 = phi i32 [ %call2, %if.end ], [ %call20, %if.else19 ], [ -1, %if.then18 ], [ %call15, %if.then14 ], [ %c.2, %while.cond5 ] +; CHECK-NOT: %c.140 = phi i32 [ %call2, %if.end ], [ %call20, %if.else19 ], [ -1, %if.then18 ], [ %call15, %if.then14 ], [ %c.2, %while.body.backedge.loopexit ] +while.body: ; preds = %while.body.backedge, %if.end + %c.140 = phi i32 [ %call2, %if.end ], [ %c.140.be, %while.body.backedge ] + switch i32 %c.140, label %if.else19 [ + i32 42, label %while.cond5.preheader + i32 10, label %if.then14 + i32 -1, label %if.then18 + ] + +while.cond5.preheader: ; preds = %while.body + br label %while.cond5 + +while.cond5: ; preds = %while.body7, %while.cond5.preheader + %c.2 = phi i32 [ %call8, %while.body7 ], [ 42, %while.cond5.preheader ] + switch i32 %c.2, label %while.body.backedge.loopexit [ + i32 42, label %while.body7 + i32 47, label %for.cond.backedge.loopexit + ] + +while.body7: ; preds = %while.cond5 + %3 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call8 = tail call i32 @_IO_getc(%struct._IO_FILE* %3) + br label %while.cond5 + +if.then14: ; preds = %while.body + %4 = load i32, i32* @lineno, align 4 + %inc = add nsw i32 %4, 1 + store i32 %inc, i32* @lineno, align 4 + %5 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call15 = tail call i32 @_IO_getc(%struct._IO_FILE* %5) + br label %while.body.backedge + +if.then18: ; preds = %while.body + tail call void @fatal(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0)) #3 + br label %while.body.backedge + +if.else19: ; preds = %while.body + %6 = load %struct._IO_FILE*, %struct._IO_FILE** @finput, align 8 + %call20 = tail call i32 @_IO_getc(%struct._IO_FILE* %6) + br label %while.body.backedge + +while.body.backedge.loopexit: ; preds = %while.cond5 + br label %while.body.backedge + +while.body.backedge: ; preds = %while.body.backedge.loopexit, %if.else19, %if.then18, %if.then14 + %c.140.be = phi i32 [ %call20, %if.else19 ], [ -1, %if.then18 ], [ %call15, %if.then14 ], [ %c.2, %while.body.backedge.loopexit ] + br label %while.body + +sw.bb25: ; preds = %for.cond + %7 = load i32, i32* @lineno, align 4 + %inc26 = add nsw i32 %7, 1 + store i32 %inc26, i32* @lineno, align 4 + br label %for.cond.backedge + +for.cond.backedge.loopexit: ; preds = %while.cond5 + br label %for.cond.backedge + +for.cond.backedge: ; preds = %for.cond.backedge.loopexit, %sw.bb25, %for.cond, %for.cond, %for.cond + br label %for.cond + +sw.default: ; preds = %for.cond + ret i32 %call11 +} +; Function Attrs: nounwind +declare i32 @_IO_getc(%struct._IO_FILE* nocapture) local_unnamed_addr #1 + +declare void @fatals(i8*, i32, i32, i32, i32, i32, i32, i32, i32) local_unnamed_addr #2 + +declare void @fatal(i8*) local_unnamed_addr #2