Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -229,6 +229,7 @@ private: bool eliminateFallThrough(Function &F); + bool tryEarlyReturns(Function &F); bool eliminateMostlyEmptyBlocks(Function &F); BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; @@ -249,6 +250,7 @@ bool optimizeSwitchInst(SwitchInst *CI); bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB); + bool dupRetFedByEmptyBlocks(BasicBlock *BB); bool placeDbgValues(Function &F); bool canFormExtLd(const SmallVectorImpl &MovedExts, LoadInst *&LI, Instruction *&Inst, bool HasPromoted); @@ -326,6 +328,10 @@ } } + // Before we eliminate empty blocks, try to see if some of them can be + // converted to return blocks if the target wants that. + EverMadeChange |= tryEarlyReturns(F); + // Eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= eliminateMostlyEmptyBlocks(F); @@ -639,6 +645,15 @@ return Changed; } +/// Try to see if any blocks just branch to a return block that has no code. +/// It may be beneficial to convert such blocks into return blocks. +bool CodeGenPrepare::tryEarlyReturns(Function &F) { + bool MadeChange = false; + for (BasicBlock &BB : F) + MadeChange |= dupRetFedByEmptyBlocks(&BB); + return MadeChange; +} + /// Eliminate blocks that contain only PHI nodes, debug info directives, and an /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split /// edges in ways that are non-optimal for isel. Start by eliminating these @@ -2607,6 +2622,71 @@ return Changed; } +/// Look for opportunities to duplicate a return instruction into a predecessor +/// that does nothing but branch to the return block. Namely, duplicate a return +/// statement in both of bb0/bb1 below: +/// @code +/// bb0: +/// br label %return +/// bb1: +/// br label %return +/// return: +/// %retval = phi i32 [ %val1, %bb0 ], [ %val2, %bb1 ] +/// ret i32 %retval +/// @endcode +bool CodeGenPrepare::dupRetFedByEmptyBlocks(BasicBlock *BB) { + BasicBlock *SingleSucc = BB->getSingleSuccessor(); + DEBUG(dbgs() << "Trying to duplicate the successor's return into: "); + DEBUG(BB->dump()); + // Look at blocks that have a single successor + if (!SingleSucc) { + DEBUG(dbgs() << "Don't have a single successor: "); + DEBUG(dbgs() << BB->getName() << "\n"); + return false; + } + + // If the successor does nothing but return, then this instruction can + // be converted to a return. + ReturnInst *RetInst = dyn_cast(SingleSucc->getTerminator()); + if (!RetInst) { + DEBUG(dbgs() << "Single successor doesn't return: "); + DEBUG(dbgs() << SingleSucc->getName() << "\n"); + return false; + } + // The SingleSucc block must not have any code in it - just a return, + // otherwise we're skipping the execution of that code. + if (SingleSucc->getFirstNonPHIOrDbg() != SingleSucc->getTerminator()) { + DEBUG(dbgs() << "Single successor has code that we can't just bypass.\n"); + return false; + } + + // Look at the return value, if it's void - return void. + Value *RetVal = RetInst->getReturnValue(); + if (!RetVal) { + DEBUG(dbgs() << "BB before:\n"); + DEBUG(BB->dump()); + (void)FoldReturnIntoUncondBranch(RetInst, SingleSucc, BB); + DEBUG(dbgs() << "BB after:\n"); + DEBUG(BB->dump()); + ModifiedDT = true; + return true; + } + + // Otherwise the return value has to come from a PHI and we can just return + // the incoming value that comes from this BB. + PHINode *RetPHI = dyn_cast(RetVal); + if (RetPHI) { + DEBUG(dbgs() << "BB before:\n"); + DEBUG(BB->dump()); + (void)FoldReturnIntoUncondBranch(RetInst, SingleSucc, BB); + DEBUG(dbgs() << "BB after:\n"); + DEBUG(BB->dump()); + ModifiedDT = true; + return true; + } + return false; +} + //===----------------------------------------------------------------------===// // Memory Optimization //===----------------------------------------------------------------------===// Index: test/CodeGen/MIR/Generic/early-ret-bb.mir =================================================================== --- test/CodeGen/MIR/Generic/early-ret-bb.mir +++ test/CodeGen/MIR/Generic/early-ret-bb.mir @@ -0,0 +1,67 @@ +# RUN: llc -run-pass codegenprepare -o - %s | FileCheck %s + +--- | + + define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { + entry: + %cmp5 = icmp sgt i32 %lim, 0 + br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + br label %for.body + + for.cond.cleanup.loopexit: ; preds = %for.body + %.lcssa = phi i32 [ %0, %for.body ] + br label %for.cond.cleanup + + for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %Ret.0.lcssa = phi i32 [ 0, %entry ], [ %.lcssa, %for.cond.cleanup.loopexit ] + ret i32 %Ret.0.lcssa + + for.body: ; preds = %for.body, %for.body.preheader + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ %lim, %for.body.preheader ] + %Ret.06 = phi i32 [ %0, %for.body ], [ 0, %for.body.preheader ] + %0 = add i32 %a, %Ret.06 + %lsr.iv.next = add i32 %lsr.iv, -1 + %exitcond = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body + } + +... +--- +# CHECK: for.cond.cleanup.loopexit +# CHECK-NEXT: ret i32 %0 +# CHECK: for.cond.cleanup +# CHECK-NEXT: ret i32 0 +name: shrinkwrapme +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: +liveins: +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + +... Index: test/CodeGen/PowerPC/early-exit-shrink-wrap.ll =================================================================== --- test/CodeGen/PowerPC/early-exit-shrink-wrap.ll +++ test/CodeGen/PowerPC/early-exit-shrink-wrap.ll @@ -0,0 +1,35 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck %s +define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { +entry: + %cmp5 = icmp sgt i32 %lim, 0 + br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %Ret.0.lcssa = phi i32 [ 0, %entry ], [ %0, %for.cond.cleanup.loopexit ] + ret i32 %Ret.0.lcssa + +for.body: ; preds = %for.body.preheader, %for.body + %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %Ret.06 = phi i32 [ %0, %for.body ], [ 0, %for.body.preheader ] + %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"(i32 %a, i32 %Ret.06) + %inc = add nuw nsw i32 %i.07, 1 + %exitcond = icmp eq i32 %inc, %lim + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body + +; CHECK-LABEL: shrinkwrapme +; CHECK: cmpwi 4, 1 +; CHECK-NEXT: blt 0, [[RETB:.LBB[0-9_]+]] +; CHECK: std 28 +; CHECK: mtctr +; CHECK: bdnz +; CHECK: ld 28 +; CHECK: [[RETB]]: +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: blr +}