Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "BranchFolding.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -315,6 +316,9 @@ /// Edges that have already been computed as optimal. DenseMap ComputedEdges; + /// Branch Factor. Used to decide between Equal blocks. + DenseMap BranchFactorMap; + /// \brief Machine Function MachineFunction *F; @@ -476,6 +480,13 @@ /// Find chains of triangles to tail-duplicate where a global analysis works, /// but a local analysis would not find them. void precomputeTriangleChains(); + /// Compute the branch factor of a block. Branch factor of a block with + /// multiple predecessors is 0, Otherwise the Branch factor of a block B with + /// n successors is sum(BF(successors(B))) + n - 1. Values are computed as + /// needed and placed in BranchFactorMap. Choosing a block with a higher + /// branch factor helps to make the dynamic taken branch count more + /// consistent, without raising the average. + uint32_t computeBranchFactor(const MachineBasicBlock *BB); public: static char ID; // Pass identification, replacement for typeid @@ -1199,6 +1210,47 @@ } } +uint32_t MachineBlockPlacement::computeBranchFactor(const MachineBasicBlock *BB) { + + auto FindIt = BranchFactorMap.find(BB); + if (FindIt != BranchFactorMap.end()) + return FindIt->second; + + if (BB->pred_size() > 1 || BB->succ_size() == 0) { + BranchFactorMap[BB] = 1; + return 1; + } + + typedef GraphTraits GT; + typedef typename GT::ChildIteratorType ChildItTy; + SmallVector, 16> VisitStack; + + VisitStack.emplace_back(BB, GT::child_begin(BB)); + + // Walk the nodes from BB in post order. This won't get confused by loops + // because we don't traverse nodes with more than one successor. + while(!VisitStack.empty()) { + // Keep pushing things on the stack until we have a Node with all children + // visited. + while(VisitStack.back().second != GT::child_end(VisitStack.back().first)) { + MachineBasicBlock *NextChild = *VisitStack.back().second++; + if (!BranchFactorMap.count(NextChild)) { + if (NextChild->pred_size() == 1 && NextChild->succ_size() != 0) + VisitStack.emplace_back(NextChild, GT::child_begin(NextChild)); + else + BranchFactorMap[NextChild] = 1; + } + } + auto TopBB = VisitStack.back().first; + VisitStack.pop_back(); + int SumBranches = 0; + for (MachineBasicBlock *Succ : TopBB->successors()) + SumBranches += BranchFactorMap[Succ]; + BranchFactorMap[TopBB] = SumBranches; + } + return BranchFactorMap[BB]; +} + // When profile is not present, return the StaticLikelyProb. // When profile is available, we need to handle the triangle-shape CFG. static BranchProbability getLayoutSuccessorProbThreshold( @@ -1450,6 +1502,10 @@ // calculations the minimum number of times. SmallVector, 4> DupCandidates; + // If we didn't pick a tail-duplicate candidate, choose between the blocks + // within 10% of BestProb by + SmallVector, 4> + Candidates; for (MachineBasicBlock *Succ : Successors) { auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ); BranchProbability SuccProb = @@ -1462,7 +1518,7 @@ Chain, BlockFilter)) { // If tail duplication would make Succ profitable, place it. if (TailDupPlacement && shouldTailDuplicate(Succ)) - DupCandidates.push_back(std::make_tuple(SuccProb, Succ)); + DupCandidates.emplace_back(SuccProb, Succ); continue; } @@ -1472,14 +1528,15 @@ << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "") << "\n"); - if (BestSucc.BB && BestProb >= SuccProb) { + Candidates.emplace_back(0, SuccProb, Succ); + if (BestProb >= SuccProb) { DEBUG(dbgs() << " Not the best candidate, continuing\n"); continue; } DEBUG(dbgs() << " Setting it as best candidate\n"); - BestSucc.BB = Succ; BestProb = SuccProb; + BestSucc.BB = Succ; } // Handle the tail duplication candidates in order of decreasing probability. // Stop at the first one that is profitable. Also stop if they are less @@ -1503,18 +1560,60 @@ if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) { DEBUG( - dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: " + dbgs() << " Selected: " << getBlockName(Succ) << ", probability: " << DupProb << " (Tail Duplicate)\n"); BestSucc.BB = Succ; BestSucc.ShouldTailDup = true; - break; + return BestSucc; } } - if (BestSucc.BB) - DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n"); + // If we didn't find any suitable successor, return now. + if (!BestSucc.BB) + return BestSucc; + + // The code from here to the end chooses blocks based on branch factor if they + // are close enough to the max probability. First we filter the vector down to + // the tuples that are close enough to the max probability. Then if there are + // more than one, we calculate the branch factor for those blocks. Finally we + // choose based on (branch factor, probability). We preserve order for + // determinism. + BranchProbability ScaledBestProb = BranchProbability(9, 10) * BestProb; + // If we didn't pick a tail-duplicate candidate, choose between the blocks + // within 10% of BestProb by + auto NotProbableEnough = [&ScaledBestProb] ( + std::tuple Candidate) { + return std::get<1>(Candidate) < ScaledBestProb; + }; + auto ValidCandidates = make_range( + Candidates.begin(), std::remove_if( + Candidates.begin(), Candidates.end(), NotProbableEnough)); + // If we only have one remaining candidate, use that. + if (std::next(ValidCandidates.begin()) == ValidCandidates.end()) { + DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n"); + BestSucc.BB = std::get<2>(*ValidCandidates.begin()); + return BestSucc; + } + for (auto &Candidate : ValidCandidates) { + auto Succ = std::get<2>(Candidate); + std::get<0>(Candidate) = computeBranchFactor(Succ); + } + auto cmp = + [](const std::tuple &a, + const std::tuple &b) { + return (std::get<0>(a) < std::get<0>(b) || + (std::get<0>(a) == std::get<0>(b) && + std::get<1>(a) < std::get<1>(b))); + }; + BranchProbability Prob; + uint32_t BranchFactor; + std::tie(BranchFactor, Prob, BestSucc.BB) = *std::max_element( + ValidCandidates.begin(), ValidCandidates.end(), cmp); + DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) + << ", probability: " << Prob + << ", branch factor: " << BranchFactor << "\n"); return BestSucc; } @@ -2713,6 +2812,7 @@ // Redo the layout if tail merging creates/removes/moves blocks. BlockToChain.clear(); ComputedEdges.clear(); + BranchFactorMap.clear(); // Must redo the post-dominator tree if blocks were changed. if (MPDT) MPDT->runOnMachineFunction(MF); @@ -2726,6 +2826,7 @@ BlockToChain.clear(); ComputedEdges.clear(); + BranchFactorMap.clear(); ChainAllocator.DestroyAll(); if (AlignAllBlock) Index: test/CodeGen/AArch64/combine-comparisons-by-cse.ll =================================================================== --- test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -47,10 +47,10 @@ define i32 @combine_gt_lt_5() #0 { ; CHECK-LABEL: combine_gt_lt_5 ; CHECK: cmp -; CHECK: b.le -; CHECK: ret +; CHECK: b.gt ; CHECK-NOT: cmp ; CHECK: b.ge +; CHECK: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, 5 @@ -121,10 +121,10 @@ define i32 @combine_lt_gt_5() #0 { ; CHECK-LABEL: combine_lt_gt_5 ; CHECK: cmp -; CHECK: b.ge -; CHECK: ret +; CHECK: b.lt ; CHECK-NOT: cmp ; CHECK: b.le +; CHECK: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp slt i32 %0, 5 @@ -158,10 +158,10 @@ define i32 @combine_gt_lt_n5() #0 { ; CHECK-LABEL: combine_gt_lt_n5 ; CHECK: cmn -; CHECK: b.le -; CHECK: ret +; CHECK: b.gt ; CHECK-NOT: cmn ; CHECK: b.ge +; CHECK: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, -5 @@ -195,10 +195,10 @@ define i32 @combine_lt_gt_n5() #0 { ; CHECK-LABEL: combine_lt_gt_n5 ; CHECK: cmn -; CHECK: b.ge -; CHECK: ret +; CHECK: b.lt ; CHECK-NOT: cmn ; CHECK: b.le +; CHECK: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp slt i32 %0, -5 Index: test/CodeGen/AArch64/machine_cse.ll =================================================================== --- test/CodeGen/AArch64/machine_cse.ll +++ test/CodeGen/AArch64/machine_cse.ll @@ -13,7 +13,7 @@ define void @combine-sign-comparisons-by-cse(i32 *%arg) { ; CHECK: cmp -; CHECK: b.ge +; CHECK: b.lt ; CHECK-NOT: cmp ; CHECK: b.le Index: test/CodeGen/ARM/2013-05-05-IfConvertBug.ll =================================================================== --- test/CodeGen/ARM/2013-05-05-IfConvertBug.ll +++ test/CodeGen/ARM/2013-05-05-IfConvertBug.ll @@ -126,10 +126,7 @@ ; CHECK-V8-LABEL: wrapDistance: ; CHECK-V8: cmp r1, #59 -; CHECK-V8-NEXT: bgt -; CHECK-V8-NEXT: %if.then -; CHECK-V8-NEXT: subs r0, r2, #1 -; CHECK-V8-NEXT: bx lr +; CHECK-V8-NEXT: ble ; CHECK-V8-NEXT: %if.else ; CHECK-V8-NEXT: subs [[REG:r[0-9]+]], #120 ; CHECK-V8-NEXT: cmp [[REG]], r1 @@ -140,6 +137,9 @@ ; CHECK-V8-NEXT: %if.then4 ; CHECK-V8-NEXT: adds r0, r1, #1 ; CHECK-V8-NEXT: bx lr +; CHECK-V8-NEXT: %if.then +; CHECK-V8-NEXT: subs r0, r2, #1 +; CHECK-V8-NEXT: bx lr ; CHECK-V8-NEXT: %if.end5 ; CHECK-V8-NEXT: subs r0, r1, r0 ; CHECK-V8-NEXT: bx lr Index: test/CodeGen/ARM/tail-opts.ll =================================================================== --- test/CodeGen/ARM/tail-opts.ll +++ test/CodeGen/ARM/tail-opts.ll @@ -15,16 +15,17 @@ ; redundant branching. ; CHECK-LABEL: tail_duplicate_me: -; CHECK: qux +; CHECK: car ; CHECK: movw r{{[0-9]+}}, :lower16:_GHJK ; CHECK: movt r{{[0-9]+}}, :upper16:_GHJK ; CHECK: str r ; CHECK-NEXT: bx r -; CHECK: qux +; CHECK: bar ; CHECK: movw r{{[0-9]+}}, :lower16:_GHJK ; CHECK: movt r{{[0-9]+}}, :upper16:_GHJK ; CHECK: str r ; CHECK-NEXT: bx r +; CHECK: dar ; CHECK: movw r{{[0-9]+}}, :lower16:_GHJK ; CHECK: movt r{{[0-9]+}}, :upper16:_GHJK ; CHECK: str r Index: test/CodeGen/PowerPC/tail-dup-branch-to-fallthrough.ll =================================================================== --- test/CodeGen/PowerPC/tail-dup-branch-to-fallthrough.ll +++ test/CodeGen/PowerPC/tail-dup-branch-to-fallthrough.ll @@ -16,14 +16,14 @@ ; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} ; CHECK: # %entry ; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} -; CHECK: # %sw.0 -; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} -; CHECK: # %sw.1 -; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} ; CHECK: # %sw.default ; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} ; CHECK: # %if.then ; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} +; CHECK: # %sw.1 +; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} +; CHECK: # %sw.0 +; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} ; CHECK: # %if.else ; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} ; CHECK: .Lfunc_end0 Index: test/CodeGen/WebAssembly/cfg-stackify.ll =================================================================== --- test/CodeGen/WebAssembly/cfg-stackify.ll +++ test/CodeGen/WebAssembly/cfg-stackify.ll @@ -536,17 +536,16 @@ ; OPT: block {{$}} ; OPT-NEXT: block {{$}} ; OPT: br_if 0, $pop{{[0-9]+}}{{$}} -; OPT: br_if 1, $pop{{[0-9]+}}{{$}} -; OPT: br 1{{$}} -; OPT-NEXT: .LBB13_3: -; OPT-NEXT: end_block{{$}} -; OPT-NEXT: block {{$}} +; OPT: block {{$}} ; OPT: br_if 0, $pop{{[0-9]+}}{{$}} -; OPT: br_if 1, $pop{{[0-9]+}}{{$}} -; OPT-NEXT: .LBB13_5: +; OPT: br_if 2, $pop{{[0-9]+}}{{$}} +; OPT-NEXT: .LBB13_3: ; OPT-NEXT: end_block{{$}} ; OPT-NEXT: return{{$}} -; OPT-NEXT: .LBB13_6: +; OPT-NEXT: .LBB13_4: +; OPT-NEXT: end_block{{$}} +; OPT: br_if 0, $pop{{[0-9]+}}{{$}} +; OPT: .LBB13_6: ; OPT-NEXT: end_block{{$}} ; OPT-NEXT: return{{$}} define void @test4(i32 %t) { @@ -1022,12 +1021,12 @@ ; OPT-NOT: block ; OPT: br_if 0, $pop{{[0-9]+}}{{$}} ; OPT-NOT: block +; OPT: br_if 1, $pop{{[0-9]+}}{{$}} +; OPT-NOT: block ; OPT: return{{$}} -; OPT-NEXT: .LBB20_6: +; OPT-NEXT: .LBB20_7: ; OPT-NEXT: end_block{{$}} ; OPT-NOT: block -; OPT: br_if 0, $pop{{[0-9]+}}{{$}} -; OPT-NOT: block ; OPT: return{{$}} ; OPT-NEXT: .LBB20_8: ; OPT-NEXT: end_block{{$}} Index: test/CodeGen/X86/block-placement-branch-factor.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/block-placement-branch-factor.mir @@ -0,0 +1,212 @@ +# RUN: llc -mtriple=x86_64-linux -run-pass=block-placement -o - %s | FileCheck %s +# +# Check that all but possibly the last if.else blocks get laid out first, before +# any of the if.then blocks, due to the branch factor checking. +# CHECK: bb.0.entry +# CHECK: bb.2.if.else +# CHECK: bb.4.if.else7 +# CHECK: bb.6.if.else17 +# One of these 2 blocks should follow else17 for fallthrough. As far as branch +# factor is concerned, they are equals, so it doesn't matter. +# CHECK: bb.{{[0-9]+}}.if.{{(then23|else27)}} +# CHECK-DAG: bb.{{[0-9]+}}.if.else27 +# CHECK-DAG: bb.{{[0-9]+}}.if.then +# CHECK-DAG: bb.{{[0-9]+}}.if.then4 +# CHECK-DAG: bb.{{[0-9]+}}.if.then13 +# The other block should be placed somewhere. +# CHECK-DAG: bb.{{[0-9]+}}.if.{{(else27|then23)}} +--- | + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + define i8* @varint_encode(i8* %sptr, i32 %v) local_unnamed_addr { + entry: + %cmp = icmp ult i32 %v, 128 + %conv = trunc i32 %v to i8 + br i1 %cmp, label %if.then, label %if.else + + if.then: ; preds = %entry + %incdec.ptr = getelementptr inbounds i8, i8* %sptr, i64 1 + store i8 %conv, i8* %sptr, align 1, !tbaa !1 + br label %if.end37 + + if.else: ; preds = %entry + %conv1 = or i8 %conv, -128 + store i8 %conv1, i8* %sptr, align 1, !tbaa !1 + %cmp3 = icmp ult i32 %v, 16384 + %shr = lshr i32 %v, 7 + %conv5 = trunc i32 %shr to i8 + br i1 %cmp3, label %if.then4, label %if.else7 + + if.then4: ; preds = %if.else + %incdec.ptr6 = getelementptr inbounds i8, i8* %sptr, i64 2 + %sunkaddr = getelementptr i8, i8* %sptr, i64 1 + store i8 %conv5, i8* %sunkaddr, align 1, !tbaa !1 + br label %if.end37 + + if.else7: ; preds = %if.else + %conv10 = or i8 %conv5, -128 + %sunkaddr1 = getelementptr i8, i8* %sptr, i64 1 + store i8 %conv10, i8* %sunkaddr1, align 1, !tbaa !1 + %cmp12 = icmp ult i32 %v, 2097152 + %shr14 = lshr i32 %v, 14 + %conv15 = trunc i32 %shr14 to i8 + br i1 %cmp12, label %if.then13, label %if.else17 + + if.then13: ; preds = %if.else7 + %incdec.ptr16 = getelementptr inbounds i8, i8* %sptr, i64 3 + %sunkaddr2 = getelementptr i8, i8* %sptr, i64 2 + store i8 %conv15, i8* %sunkaddr2, align 1, !tbaa !1 + br label %if.end37 + + if.else17: ; preds = %if.else7 + %conv20 = or i8 %conv15, -128 + %sunkaddr3 = getelementptr i8, i8* %sptr, i64 2 + store i8 %conv20, i8* %sunkaddr3, align 1, !tbaa !1 + %cmp22 = icmp ult i32 %v, 268435456 + %shr24 = lshr i32 %v, 21 + %conv25 = trunc i32 %shr24 to i8 + br i1 %cmp22, label %if.then23, label %if.else27 + + if.then23: ; preds = %if.else17 + %incdec.ptr26 = getelementptr inbounds i8, i8* %sptr, i64 4 + %sunkaddr4 = getelementptr i8, i8* %sptr, i64 3 + store i8 %conv25, i8* %sunkaddr4, align 1, !tbaa !1 + br label %if.end37 + + if.else27: ; preds = %if.else17 + %conv30 = or i8 %conv25, -128 + %incdec.ptr31 = getelementptr inbounds i8, i8* %sptr, i64 4 + %sunkaddr5 = getelementptr i8, i8* %sptr, i64 3 + store i8 %conv30, i8* %sunkaddr5, align 1, !tbaa !1 + %shr32 = lshr i32 %v, 28 + %conv33 = trunc i32 %shr32 to i8 + %incdec.ptr34 = getelementptr inbounds i8, i8* %sptr, i64 5 + store i8 %conv33, i8* %incdec.ptr31, align 1, !tbaa !1 + br label %if.end37 + + if.end37: ; preds = %if.else27, %if.then23, %if.then13, %if.then4, %if.then + %ptr.0 = phi i8* [ %incdec.ptr, %if.then ], [ %incdec.ptr6, %if.then4 ], [ %incdec.ptr16, %if.then13 ], [ %incdec.ptr26, %if.then23 ], [ %incdec.ptr34, %if.else27 ] + ret i8* %ptr.0 + } + + !llvm.ident = !{!0} + + !0 = !{!"clang version 5.0.0 (trunk 303077) (llvm/trunk 303082)"} + !1 = !{!2, !2, i64 0} + !2 = !{!"omnipotent char", !3, i64 0} + !3 = !{!"Simple C++ TBAA"} + +... +--- +name: varint_encode +alignment: 4 +exposesReturnsTwice: false +noVRegs: true +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%rdi' } + - { reg: '%esi' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.1.if.then(0x40000000), %bb.2.if.else(0x40000000) + liveins: %esi, %rdi + + CMP32ri8 %esi, 127, implicit-def %eflags + JA_1 %bb.2.if.else, implicit killed %eflags + + bb.1.if.then: + liveins: %esi, %rdi + + MOV8mr %rdi, 1, _, 0, _, %sil, implicit killed %esi :: (store 1 into %ir.sptr, !tbaa !1) + %rdi = INC64r killed %rdi, implicit-def dead %eflags + %rax = MOV64rr killed %rdi + RETQ %rax + + bb.2.if.else: + successors: %bb.3.if.then4(0x40000000), %bb.4.if.else7(0x40000000) + liveins: %esi, %rdi + + %al = MOV8rr %sil + %al = OR8ri killed %al, -128, implicit-def dead %eflags + MOV8mr %rdi, 1, _, 0, _, killed %al :: (store 1 into %ir.sptr, !tbaa !1) + %eax = MOV32rr %esi + %eax = SHR32ri killed %eax, 7, implicit-def dead %eflags + CMP32ri %esi, 16383, implicit-def %eflags + JA_1 %bb.4.if.else7, implicit killed %eflags + + bb.3.if.then4: + liveins: %eax, %rdi + + MOV8mr %rdi, 1, _, 1, _, %al, implicit killed %eax :: (store 1 into %ir.sunkaddr, !tbaa !1) + %rdi = ADD64ri8 killed %rdi, 2, implicit-def dead %eflags + %rax = MOV64rr killed %rdi + RETQ %rax + + bb.4.if.else7: + successors: %bb.5.if.then13(0x40000000), %bb.6.if.else17(0x40000000) + liveins: %eax, %esi, %rdi + + %al = OR8ri %al, -128, implicit-def dead %eflags, implicit killed %eax, implicit-def %eax + MOV8mr %rdi, 1, _, 1, _, %al, implicit killed %eax :: (store 1 into %ir.sunkaddr1, !tbaa !1) + %eax = MOV32rr %esi + %eax = SHR32ri killed %eax, 14, implicit-def dead %eflags + CMP32ri %esi, 2097151, implicit-def %eflags + JA_1 %bb.6.if.else17, implicit killed %eflags + + bb.5.if.then13: + liveins: %eax, %rdi + + MOV8mr %rdi, 1, _, 2, _, %al, implicit killed %eax :: (store 1 into %ir.sunkaddr2, !tbaa !1) + %rdi = ADD64ri8 killed %rdi, 3, implicit-def dead %eflags + %rax = MOV64rr killed %rdi + RETQ %rax + + bb.6.if.else17: + successors: %bb.7.if.then23(0x40000000), %bb.8.if.else27(0x40000000) + liveins: %eax, %esi, %rdi + + %al = OR8ri %al, -128, implicit-def dead %eflags, implicit killed %eax, implicit-def %eax + MOV8mr %rdi, 1, _, 2, _, %al, implicit killed %eax :: (store 1 into %ir.sunkaddr3, !tbaa !1) + %eax = MOV32rr %esi + %eax = SHR32ri killed %eax, 21, implicit-def dead %eflags + CMP32ri %esi, 268435455, implicit-def %eflags + JA_1 %bb.8.if.else27, implicit killed %eflags + + bb.7.if.then23: + liveins: %eax, %rdi + + MOV8mr %rdi, 1, _, 3, _, %al, implicit killed %eax :: (store 1 into %ir.sunkaddr4, !tbaa !1) + %rdi = ADD64ri8 killed %rdi, 4, implicit-def dead %eflags + %rax = MOV64rr killed %rdi + RETQ %rax + + bb.8.if.else27: + liveins: %eax, %esi, %rdi + + %al = OR8ri %al, -128, implicit-def dead %eflags, implicit killed %eax, implicit-def %eax + MOV8mr %rdi, 1, _, 3, _, %al, implicit killed %eax :: (store 1 into %ir.sunkaddr5, !tbaa !1) + %esi = SHR32ri killed %esi, 28, implicit-def dead %eflags + MOV8mr %rdi, 1, _, 4, _, %sil, implicit killed %esi :: (store 1 into %ir.incdec.ptr31, !tbaa !1) + %rdi = ADD64ri8 killed %rdi, 5, implicit-def dead %eflags + %rax = MOV64rr killed %rdi + RETQ %rax + +... Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -699,10 +699,10 @@ ; ; CHECK-LABEL: unanalyzable_branch_to_free_block ; CHECK: %entry -; CHECK: %a ; CHECK: %b -; CHECK: %c ; CHECK: %exit +; CHECK: %a +; CHECK: %c entry: br i1 undef, label %a, label %b Index: test/CodeGen/X86/loop-blocks.ll =================================================================== --- test/CodeGen/X86/loop-blocks.ll +++ test/CodeGen/X86/loop-blocks.ll @@ -140,19 +140,19 @@ ; CHECK-NEXT: .LBB3_1: ; CHECK-NEXT: callq loop_header ; CHECK: jl .LBB3_7 -; CHECK: jge .LBB3_3 +; CHECK: jl .LBB3_8 +; CHECK: jl .LBB3_9 +; CHECK: jl .LBB3_6 +; CHECK-NEXT: callq loop_latch +; CHECK-NEXT: jmp .LBB3_1 +; CHECK: .LBB3_8: ; CHECK-NEXT: callq bar101 ; CHECK-NEXT: jmp .LBB3_1 -; CHECK-NEXT: align -; CHECK-NEXT: .LBB3_3: -; CHECK: jge .LBB3_4 +; CHECK-NEXT: .LBB3_9: ; CHECK-NEXT: callq bar102 ; CHECK-NEXT: jmp .LBB3_1 -; CHECK-NEXT: .LBB3_4: -; CHECK: jl .LBB3_6 -; CHECK-NEXT: callq loop_latch -; CHECK-NEXT: jmp .LBB3_1 ; CHECK-NEXT: .LBB3_6: +; CHECK-NEXT: callq exit define void @cfg_islands() nounwind { entry: @@ -228,39 +228,6 @@ ret void } -; This is exactly the same function as slightly_more_involved. -; The difference is that when optimising for size, we do not want -; to see this reordering. - -; CHECK-LABEL: slightly_more_involved_2: -; CHECK-NOT: jmp .LBB5_1 -; CHECK: .LBB5_1: -; CHECK-NEXT: callq body - -define void @slightly_more_involved_2() #0 { -entry: - br label %loop - -loop: - call void @body() - %t0 = call i32 @get() - %t1 = icmp slt i32 %t0, 2 - br i1 %t1, label %block_a, label %bb - -bb: - %t2 = call i32 @get() - %t3 = icmp slt i32 %t2, 99 - br i1 %t3, label %exit, label %loop - -block_a: - call void @bar99() - br label %loop - -exit: - call void @exit() - ret void -} - attributes #0 = { minsize norecurse nounwind optsize readnone uwtable } declare void @bar99() nounwind Index: test/CodeGen/X86/tail-merge-after-mbp.mir =================================================================== --- test/CodeGen/X86/tail-merge-after-mbp.mir +++ test/CodeGen/X86/tail-merge-after-mbp.mir @@ -3,29 +3,29 @@ --- # check loop bb.7 is not merged with bb.10, bb.13 # check loop bb.9 is not merged with bb.12 -# CHECK: bb.2: -# CHECK-NEXT: successors: %bb.9(0x30000000), %bb.3(0x50000000) +# CHECK: bb.1: +# CHECK-NEXT: successors: %bb.9(0x30000000), %bb.2(0x50000000) # CHECK: %rax = MOV64rm %r14, 1, _, 0, _ # CHECK-NEXT: TEST64rr %rax, %rax # CHECK-NEXT: JE_1 %bb.9 -# CHECK: bb.3: -# CHECK-NEXT: successors: %bb.4(0x30000000), %bb.8(0x50000000) +# CHECK: bb.2: +# CHECK-NEXT: successors: %bb.3(0x30000000), %bb.8(0x50000000) # CHECK: CMP64mi8 killed %rax, 1, _, 8, _, 0 # CHECK-NEXT: JNE_1 %bb.8 -# CHECK: bb.4: -# CHECK-NEXT: successors: %bb.9(0x30000000), %bb.5(0x50000000) +# CHECK: bb.3: +# CHECK-NEXT: successors: %bb.9(0x30000000), %bb.4(0x50000000) # CHECK: %rax = MOV64rm %r14, 1, _, 0, _ # CHECK-NEXT: TEST64rr %rax, %rax # CHECK-NEXT: JE_1 %bb.9 -# CHECK: bb.5 -# CHECK-NEXT: successors: %bb.6(0x71555555), %bb.8(0x0eaaaaab) +# CHECK: bb.4 +# CHECK-NEXT: successors: %bb.5(0x71555555), %bb.8(0x0eaaaaab) # CHECK: CMP64mi8 killed %rax, 1, _, 8, _, 0 # CHECK-NEXT: JNE_1 %bb.8 -# CHECK: bb.6: -# CHECK-NEXT: successors: %bb.9(0x04000000), %bb.5(0x7c000000) +# CHECK: bb.5: +# CHECK-NEXT: successors: %bb.9(0x04000000), %bb.4(0x7c000000) # CHECK: %rax = MOV64rm %r14, 1, _, 0, _ # CHECK-NEXT: TEST64rr %rax, %rax -# CHECK-NEXT: JNE_1 %bb.5 +# CHECK-NEXT: JNE_1 %bb.4 name: foo body: |