Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -111,6 +112,10 @@ cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare")); +static cl::opt DisablePreheaderProtect( + "disable-preheader-prot", cl::Hidden, cl::init(false), + cl::desc("Disable protection against removing loop preheaders")); + namespace { typedef SmallPtrSet SetOfInstrs; typedef PointerIntPair TypeIsSExt; @@ -122,6 +127,7 @@ const TargetLowering *TLI; const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; + const LoopInfo *LI; /// As we scan instructions optimizing them, this is the next instruction /// to optimize. Transforms that can invalidate this should update it. @@ -158,9 +164,10 @@ const char *getPassName() const override { return "CodeGen Prepare"; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved(); + // FIXME: When we can selectively preserve passes, preserve the domtree. AU.addRequired(); AU.addRequired(); + AU.addRequired(); } private: @@ -218,6 +225,7 @@ TLI = TM->getSubtargetImpl(F)->getTargetLowering(); TLInfo = &getAnalysis().getTLI(); TTI = &getAnalysis().getTTI(F); + LI = &getAnalysis().getLoopInfo(); OptSize = F.optForSize(); /// This optimization identifies DIV instructions that can be @@ -359,6 +367,15 @@ /// edges in ways that are non-optimal for isel. Start by eliminating these /// blocks so we can split them the way we want them. bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { + SmallPtrSet Preheaders; + SmallVector LoopList(LI->begin(), LI->end()); + while (!LoopList.empty()) { + Loop *L = LoopList.pop_back_val(); + LoopList.insert(LoopList.end(), L->begin(), L->end()); + if (BasicBlock *Preheader = L->getLoopPreheader()) + Preheaders.insert(Preheader); + } + bool MadeChange = false; // Note that this intentionally skips the entry block. for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { @@ -391,6 +408,14 @@ if (!canMergeBlocks(BB, DestBB)) continue; + // Do not delete loop preheaders if doing so would create a critical edge. + // Loop preheaders can be good locations to spill registers. If the + // preheader is deleted and we create a critical edge, registers may be + // spilled in the loop body instead. + if (!DisablePreheaderProtect && Preheaders.count(BB) && + !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor())) + continue; + eliminateMostlyEmptyBlock(BB); MadeChange = true; } Index: test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -29,7 +29,7 @@ ; Set the first argument to zero. ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: bl _doSomething -; +; ; Without shrink-wrapping, epilogue is in the exit block. ; DISABLE: [[EXIT_LABEL]]: ; Epilogue code. @@ -332,11 +332,11 @@ ; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Sum is merged with the returned register. -; CHECK: mov [[SUM:w0]], wzr -; CHECK-NEXT: add [[VA_BASE:x[0-9]+]], sp, #16 +; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16 ; CHECK-NEXT: str [[VA_BASE]], [sp, #8] ; CHECK-NEXT: cmp w1, #1 ; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]] +; CHECK: mov [[SUM:w0]], wzr ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body ; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8] @@ -347,18 +347,18 @@ ; CHECK-NEXT: sub w1, w1, #1 ; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]] ; -; DISABLE-NEXT: b [[IFEND_LABEL]] +; DISABLE-NEXT: b ; DISABLE: [[ELSE_LABEL]]: ; %if.else ; DISABLE: lsl w0, w1, #1 ; +; ENABLE: [[ELSE_LABEL]]: ; %if.else +; ENABLE: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; ; CHECK: [[IFEND_LABEL]]: ; Epilogue code. ; CHECK: add sp, sp, #16 ; CHECK-NEXT: ret -; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; ENABLE: lsl w0, w1, #1 -; ENABLE-NEXT: ret define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 { entry: %ap = alloca i8*, align 8 Index: test/CodeGen/ARM/code-placement.ll =================================================================== --- test/CodeGen/ARM/code-placement.ll +++ test/CodeGen/ARM/code-placement.ll @@ -12,9 +12,9 @@ br i1 %0, label %bb2, label %bb bb: -; CHECK: LBB0_1: -; CHECK: bne LBB0_1 -; CHECK-NOT: b LBB0_1 +; CHECK: LBB0_2: +; CHECK: bne LBB0_2 +; CHECK-NOT: b LBB0_2 ; CHECK: bx lr %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ] %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ] Index: test/CodeGen/ARM/sjlj-prepare-critical-edge.ll =================================================================== --- test/CodeGen/ARM/sjlj-prepare-critical-edge.ll +++ test/CodeGen/ARM/sjlj-prepare-critical-edge.ll @@ -75,7 +75,7 @@ ; CHECK-LABEL: __Z4foo1c: ; CHECK: blx __Znwm -; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge +; CHECK: {{.*}}@ %do.body.i.i.i.preheader ; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]] ; CHECK: {{.*}}@ %do.body.i.i.i ; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]] Index: test/CodeGen/Generic/dont-remove-empty-preheader.ll =================================================================== --- test/CodeGen/Generic/dont-remove-empty-preheader.ll +++ test/CodeGen/Generic/dont-remove-empty-preheader.ll @@ -0,0 +1,39 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s +; CHECK: for.body.preheader + +@N = common global i32 0, align 4 +@E = common global i8** null, align 8 +@B = common global i8** null, align 8 + +; Function Attrs: nounwind +define i32 @foo() { +entry: + %0 = load i32, i32* @N, align 4 + %1 = load i8**, i8*** @E, align 8 + %2 = load i8**, i8*** @B, align 8 + %cmp7 = icmp eq i8** %2, %1 + br i1 %cmp7, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %n.0.lcssa = phi i32 [ %0, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ] + ret i32 %n.0.lcssa + +for.body: ; preds = %for.body.preheader, %for.body + %I.09 = phi i8** [ %incdec.ptr, %for.body ], [ %2, %for.body.preheader ] + %n.08 = phi i32 [ %add, %for.body ], [ %0, %for.body.preheader ] + %3 = load i8*, i8** %I.09, align 8 + %call = tail call i32 @map(i8* %3) + %add = add nsw i32 %call, %n.08 + %incdec.ptr = getelementptr inbounds i8*, i8** %I.09, i64 1 + %cmp = icmp eq i8** %incdec.ptr, %1 + br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body +} + +declare i32 @map(i8*) Index: test/CodeGen/Mips/brdelayslot.ll =================================================================== --- test/CodeGen/Mips/brdelayslot.ll +++ test/CodeGen/Mips/brdelayslot.ll @@ -5,19 +5,19 @@ ; RUN: llc -march=mipsel -disable-mips-df-forward-search=false \ ; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=FORWARD ; RUN: llc -march=mipsel -disable-mips-df-backward-search \ -; RUN: -disable-mips-df-succbb-search=false < %s | \ +; RUN: -disable-mips-df-succbb-search=false -disable-preheader-prot=true < %s | \ ; RUN: FileCheck %s -check-prefix=SUCCBB define void @foo1() nounwind { entry: -; Default: jalr -; Default-NOT: nop -; Default: jr +; Default: jalr +; Default-NOT: nop +; Default: jr ; Default-NOT: nop ; Default: .end -; None: jalr -; None: nop -; None: jr +; None: jalr +; None: nop +; None: jr ; None: nop ; None: .end Index: test/CodeGen/Mips/prevent-hoisting.ll =================================================================== --- test/CodeGen/Mips/prevent-hoisting.ll +++ test/CodeGen/Mips/prevent-hoisting.ll @@ -11,12 +11,12 @@ ; CHECK-LABEL: readLumaCoeff8x8_CABAC ; The check for first "addiu" instruction is added so that we can match the correct "b" instruction. -; CHECK: addiu ${{[0-9]+}}, $zero, -1 +; CHECK: andi ; CHECK: b $[[BB0:BB[0-9_]+]] -; CHECK-NEXT: addiu ${{[0-9]+}}, $zero, 0 +; CHECK-NEXT: sll ; Check that at the start of a fallthrough block there is a instruction that writes to $1. -; CHECK-NEXT: {{BB[0-9_#]+}}: +; CHECK-NEXT: {{BB[0-9_#]+}}: ; CHECK-NEXT: lw $[[R1:[0-9]+]], %got(assignSE2partition)($[[R2:[0-9]+]]) ; CHECK-NEXT: sll $1, $[[R0:[0-9]+]], 4 Index: test/CodeGen/X86/2011-09-14-valcoalesce.ll =================================================================== --- test/CodeGen/X86/2011-09-14-valcoalesce.ll +++ test/CodeGen/X86/2011-09-14-valcoalesce.ll @@ -19,7 +19,7 @@ ; reusing the pre-addition register later, or the post-addition one. Currently, ; it does the latter, so we check: -; CHECK: # %while.body85.i +; CHECK: # %while.body85.i{{$}} ; CHECK-NOT: # % ; CHECK-NOT: add ; CHECK: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]] Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -604,10 +604,8 @@ ; ; CHECK: test_unnatural_cfg_backwards_inner_loop ; CHECK: %entry -; CHECK: [[BODY:# BB#[0-9]+]]: ; CHECK: %loop2b ; CHECK: %loop1 -; CHECK: %loop2a entry: br i1 undef, label %loop2a, label %body Index: test/CodeGen/X86/break-false-dep.ll =================================================================== --- test/CodeGen/X86/break-false-dep.ll +++ test/CodeGen/X86/break-false-dep.ll @@ -64,7 +64,7 @@ declare double @llvm.sqrt.f64(double) ; SSE-LABEL: loopdep1 -; SSE: for.body +; SSE: for.body{{$}} ; ; This loop contains two cvtsi2ss instructions that update the same xmm ; register. Verify that the execution dependency fix pass breaks those @@ -139,7 +139,7 @@ ; This loop contains a cvtsi2sd instruction that has a loop-carried ; false dependency on an xmm that is modified by other scalar instructions -; that follow it in the loop. Additionally, the source of convert is a +; that follow it in the loop. Additionally, the source of convert is a ; memory operand. Verify the execution dependency fix pass breaks this ; dependency by inserting a xor before the convert. @x = common global [1024 x double] zeroinitializer, align 16 Index: test/CodeGen/X86/lsr-static-addr.ll =================================================================== --- test/CodeGen/X86/lsr-static-addr.ll +++ test/CodeGen/X86/lsr-static-addr.ll @@ -11,8 +11,8 @@ ; CHECK-NEXT: incq %rax -; ATOM: xorl %eax, %eax ; ATOM: movsd .LCPI0_0(%rip), %xmm0 +; ATOM: xorl %eax, %eax ; ATOM: align ; ATOM-NEXT: BB0_2: ; ATOM-NEXT: movsd A(,%rax,8) Index: test/CodeGen/X86/phi-immediate-factoring.ll =================================================================== --- test/CodeGen/X86/phi-immediate-factoring.ll +++ test/CodeGen/X86/phi-immediate-factoring.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6 +; RUN: llc < %s -disable-preheader-prot=true -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6 +; RUN: llc < %s -disable-preheader-prot=false -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 3 ; PR1296 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" Index: test/CodeGen/X86/phys_subreg_coalesce-2.ll =================================================================== --- test/CodeGen/X86/phys_subreg_coalesce-2.ll +++ test/CodeGen/X86/phys_subreg_coalesce-2.ll @@ -13,7 +13,7 @@ ifthen: ; preds = %entry ret i32 0 -; CHECK: forbody +; CHECK: forbody{{$}} ; CHECK-NOT: mov forbody: ; preds = %forbody, %forcond.preheader %indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ] ; [#uses=3] Index: test/CodeGen/X86/pr2659.ll =================================================================== --- test/CodeGen/X86/pr2659.ll +++ test/CodeGen/X86/pr2659.ll @@ -21,7 +21,7 @@ ; CHECK: je ; There should be no moves required in the for loop body. -; CHECK: %forbody +; CHECK: %forbody{{$}} ; CHECK-NOT: mov ; CHECK: jbe Index: test/CodeGen/X86/setcc-lowering.ll =================================================================== --- test/CodeGen/X86/setcc-lowering.ll +++ test/CodeGen/X86/setcc-lowering.ll @@ -33,7 +33,7 @@ define void @pr26232(i64 %a) { ; KNL-32-LABEL: pr26232: -; KNL-32: # BB#0: # %for_test11.preheader +; KNL-32: # BB#0: # %for_loop599.preheader ; KNL-32-NEXT: pushl %esi ; KNL-32-NEXT: .Ltmp0: ; KNL-32-NEXT: .cfi_def_cfa_offset 8 Index: test/CodeGen/X86/sink-blockfreq.ll =================================================================== --- test/CodeGen/X86/sink-blockfreq.ll +++ test/CodeGen/X86/sink-blockfreq.ll @@ -1,5 +1,5 @@ -; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI -; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI +; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI +; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI ; Test that by changing BlockFrequencyInfo we change the order in which ; machine-sink looks for sucessor blocks. By not using BFI, both G and B Index: test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll =================================================================== --- test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll +++ test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll @@ -102,7 +102,7 @@ ; CHECK-NEXT: %for.body3.us.i ; CHECK-NEXT: Inner Loop ; CHECK: testb -; CHECK: jne +; CHECK: je ; CHECK: jmp define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp { entry: