Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -452,6 +452,7 @@ void buildChain(const MachineBasicBlock *BB, BlockChain &Chain, BlockFilterSet *BlockFilter = nullptr); + bool hasRarePredecessors(MachineBasicBlock *Latch, MachineBasicBlock *Exit); MachineBasicBlock *findBestLoopTop( const MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit( @@ -1749,16 +1750,48 @@ << getBlockName(*Chain.begin()) << "\n"); } +// Check if the latch block has rarer predecessors than exit block. +// The rare predecessors is defined as the total predecessor frequency minus +// the max predecessor frequency. It is the number of reduced taken branches +// when move the latch to the top of loop. +bool +MachineBlockPlacement::hasRarePredecessors(MachineBasicBlock *Latch, + MachineBasicBlock *Exit) +{ + BlockFrequency MaxPredFreq; + BlockFrequency TotalPredFreq; + for (MachineBasicBlock *Pred : Latch->predecessors()) { + BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + TotalPredFreq += PredFreq; + if (PredFreq > MaxPredFreq) + MaxPredFreq = PredFreq; + } + BlockFrequency ReducedBranches = TotalPredFreq - MaxPredFreq; + BlockFrequency ExitFreq = MBFI->getBlockFreq(Exit); + return ReducedBranches < ExitFreq; +} + /// \brief Find the best loop top block for layout. /// /// Look for a block which is strictly better than the loop header for laying -/// out at the top of the loop. This looks for one and only one pattern: -/// a latch block with no conditional exit. This block will cause a conditional -/// jump around it or will be the bottom of the loop if we lay it out in place, -/// but if it it doesn't end up at the bottom of the loop for any reason, -/// rotation alone won't fix it. Because such a block will always result in an -/// unconditional jump (for the backedge) rotating it in front of the loop -/// header is always profitable. +/// out at the top of the loop. This looks for only two patterns: +/// +/// 1. a latch block +/// its only successor is loop header +/// +/// Because such a block will always result in an unconditional jump +/// (for the backedge) rotating it in front of the loop header is always +/// profitable. +/// +/// 2. a latch block +/// it has two successors, one is loop header, another is exit +/// it has more than one predecessors +/// +/// If it is below one of its predecessors P, only P can fall through to +/// it, all other predecessors need a jump to it, and another conditional +/// jump to loop header. If it is moved before loop header, all its +/// predecessors jump to it, then fall through to loop header. So all its +/// predecessors except P can reduce one taken branch. MachineBasicBlock * MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, const BlockFilterSet &LoopBlockSet) { @@ -1790,9 +1823,26 @@ DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has " << Pred->succ_size() << " successors, "; MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); - if (Pred->succ_size() > 1) + if (Pred->succ_size() > 2) continue; + if (Pred->succ_size() == 2) { + // The candidate should have an exit edge. + MachineBasicBlock *OutBB = *Pred->succ_begin(); + if (OutBB == L.getHeader()) + OutBB = *Pred->succ_rbegin(); + if (LoopBlockSet.count(OutBB)) + continue; + + // It must have more than 1 predecessors. + if (Pred->pred_size() == 1) + continue; + + // Move the latch to top must reduce taken branches. + if (hasRarePredecessors(Pred, OutBB)) + continue; + } + BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); if (!BestPred || PredFreq > BestPredFreq || (!(PredFreq < BestPredFreq) && Index: test/CodeGen/AArch64/neg-imm.ll =================================================================== --- test/CodeGen/AArch64/neg-imm.ll +++ test/CodeGen/AArch64/neg-imm.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -disable-block-placement -o - %s | FileCheck %s ; LSR used to pick a sub-optimal solution due to the target responding ; conservatively to isLegalAddImmediate for negative values. Index: test/CodeGen/AArch64/tailmerging_in_mbp.ll =================================================================== --- test/CodeGen/AArch64/tailmerging_in_mbp.ll +++ test/CodeGen/AArch64/tailmerging_in_mbp.ll @@ -1,9 +1,8 @@ ; RUN: llc <%s -mtriple=aarch64-eabi -verify-machine-dom-info | FileCheck %s ; CHECK-LABEL: test: -; CHECK: LBB0_7: -; CHECK: b.hi -; CHECK-NEXT: b +; CHECK-LABEL: %cond.false12.i +; CHECK: b.gt ; CHECK-NEXT: LBB0_8: ; CHECK-NEXT: mov x8, x9 ; CHECK-NEXT: LBB0_9: Index: test/CodeGen/AMDGPU/branch-uniformity.ll =================================================================== --- test/CodeGen/AMDGPU/branch-uniformity.ll +++ test/CodeGen/AMDGPU/branch-uniformity.ll @@ -9,7 +9,7 @@ ; CHECK-LABEL: {{^}}main: ; CHECK: ; %LOOP49 ; CHECK: v_cmp_ne_u32_e32 vcc, -; CHECK: s_cbranch_vccnz +; CHECK: s_cbranch_vccz ; CHECK: ; %ENDIF53 define amdgpu_vs float @main(i32 %in) { main_body: Index: test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- test/CodeGen/AMDGPU/collapse-endcf.ll +++ test/CodeGen/AMDGPU/collapse-endcf.ll @@ -205,6 +205,11 @@ ; Make sure scc liveness is updated if sor_b64 is removed ; GCN-LABEL: {{^}}scc_liveness: +; GCN: %bb10 +; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} +; GCN: s_andn2_b64 +; GCN-NEXT: s_cbranch_execz + ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]: ; GCN: s_andn2_b64 exec, exec, ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] @@ -215,10 +220,6 @@ ; GCN-NOT: s_or_b64 exec, exec ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} -; GCN: s_andn2_b64 -; GCN-NEXT: s_cbranch_execnz - -; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} ; GCN: buffer_store_dword ; GCN: buffer_store_dword ; GCN: buffer_store_dword Index: test/CodeGen/AMDGPU/global_smrd_cfg.ll =================================================================== --- test/CodeGen/AMDGPU/global_smrd_cfg.ll +++ test/CodeGen/AMDGPU/global_smrd_cfg.ll @@ -1,27 +1,28 @@ ; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s -; CHECK-LABEL: %bb11 +; CHECK-LABEL: %bb22 -; Load from %arg in a Loop body has alias store +; Load from %arg has alias store in Loop ; CHECK: flat_load_dword -; CHECK-LABEL: %bb20 -; CHECK: flat_store_dword +; ##################################################################### + +; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] + +; CHECK: s_load_dword ; ##################################################################### -; CHECK-LABEL: %bb22 +; CHECK-LABEL: %bb11 -; Load from %arg has alias store in Loop +; Load from %arg in a Loop body has alias store ; CHECK: flat_load_dword -; ##################################################################### - -; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] +; CHECK-LABEL: %bb20 -; CHECK: s_load_dword +; CHECK: flat_store_dword define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 { bb: Index: test/CodeGen/AMDGPU/hoist-cond.ll =================================================================== --- test/CodeGen/AMDGPU/hoist-cond.ll +++ test/CodeGen/AMDGPU/hoist-cond.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck %s ; Check that invariant compare is hoisted out of the loop. ; At the same time condition shall not be serialized into a VGPR and deserialized later Index: test/CodeGen/AMDGPU/loop_break.ll =================================================================== --- test/CodeGen/AMDGPU/loop_break.ll +++ test/CodeGen/AMDGPU/loop_break.ll @@ -1,5 +1,5 @@ ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s ; Uses llvm.amdgcn.break Index: test/CodeGen/AMDGPU/madmk.ll =================================================================== --- test/CodeGen/AMDGPU/madmk.ll +++ test/CodeGen/AMDGPU/madmk.ll @@ -186,9 +186,9 @@ } ; SI-LABEL: {{^}}kill_madmk_verifier_error: +; SI: s_or_b64 ; SI: s_xor_b64 ; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}} -; SI: s_or_b64 define amdgpu_kernel void @kill_madmk_verifier_error() nounwind { bb: br label %bb2 Index: test/CodeGen/AMDGPU/multilevel-break.ll =================================================================== --- test/CodeGen/AMDGPU/multilevel-break.ll +++ test/CodeGen/AMDGPU/multilevel-break.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -mtriple=amdgcn-- -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s ; OPT-LABEL: {{^}}define amdgpu_vs void @multi_else_break( ; OPT: main_body: Index: test/CodeGen/AMDGPU/valu-i1.ll =================================================================== --- test/CodeGen/AMDGPU/valu-i1.ll +++ test/CodeGen/AMDGPU/valu-i1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose -disable-block-placement < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/ARM/code-placement.ll =================================================================== --- test/CodeGen/ARM/code-placement.ll +++ test/CodeGen/ARM/code-placement.ll @@ -38,8 +38,9 @@ br i1 %0, label %bb5, label %bb.nph15 bb1: ; preds = %bb2.preheader, %bb1 +; CHECK: LBB1_[[BB3:.]]: @ %bb3 ; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader -; CHECK: blt LBB1_[[BB3:.]] +; CHECK: blt LBB1_[[BB3]] %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; [#uses=2] %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; [#uses=1] %tmp17 = sub i32 %i.07, %indvar ; [#uses=1] @@ -53,7 +54,6 @@ bb3: ; preds = %bb1, %bb2.preheader ; CHECK: LBB1_[[BB1:.]]: @ %bb1 ; CHECK: bne LBB1_[[BB1]] -; CHECK: LBB1_[[BB3]]: @ %bb3 %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; [#uses=2] %3 = add i32 %pass.011, 1 ; [#uses=2] %exitcond18 = icmp eq i32 %3, %passes ; [#uses=1] Index: test/CodeGen/ARM/swifterror.ll =================================================================== --- test/CodeGen/ARM/swifterror.ll +++ test/CodeGen/ARM/swifterror.ll @@ -183,7 +183,7 @@ ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc ; CHECK-APPLE: strb r{{.*}}, [r0, #8] -; CHECK-APPLE: ble +; CHECK-APPLE: b ; CHECK-APPLE: mov r8, [[ID]] ; CHECK-O0-LABEL: foo_loop: Index: test/CodeGen/PowerPC/cmp_elimination.ll =================================================================== --- test/CodeGen/PowerPC/cmp_elimination.ll +++ test/CodeGen/PowerPC/cmp_elimination.ll @@ -718,13 +718,14 @@ define void @func28(i32 signext %a) { ; CHECK-LABEL: @func28 ; CHECK: cmplwi [[REG1:[0-9]+]], [[REG2:[0-9]+]] -; CHECK: .[[LABEL1:[A-Z0-9_]+]]: +; CHECK: .[[LABEL2:[A-Z0-9_]+]]: +; CHECK: cmpwi [[REG1]], [[REG2]] +; CHECK: ble 0, .[[LABEL1:[A-Z0-9_]+]] ; CHECK-NOT: cmp -; CHECK: bne 0, .[[LABEL2:[A-Z0-9_]+]] +; CHECK: bne 0, .[[LABEL2]] ; CHECK: bl dummy1 -; CHECK: .[[LABEL2]]: -; CHECK: cmpwi [[REG1]], [[REG2]] -; CHECK: bgt 0, .[[LABEL1]] +; CHECK: b .[[LABEL2]] +; CHECK: .[[LABEL1]]: ; CHECK: blr entry: br label %do.body Index: test/CodeGen/SystemZ/atomicrmw-minmax-01.ll =================================================================== --- test/CodeGen/SystemZ/atomicrmw-minmax-01.ll +++ test/CodeGen/SystemZ/atomicrmw-minmax-01.ll @@ -1,8 +1,8 @@ ; Test 8-bit atomic min/max operations. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT2 ; Check signed minimum. ; - CHECK is for the main loop. Index: test/CodeGen/SystemZ/atomicrmw-minmax-02.ll =================================================================== --- test/CodeGen/SystemZ/atomicrmw-minmax-02.ll +++ test/CodeGen/SystemZ/atomicrmw-minmax-02.ll @@ -1,8 +1,8 @@ ; Test 8-bit atomic min/max operations. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT2 ; Check signed minimum. ; - CHECK is for the main loop. Index: test/CodeGen/SystemZ/loop-01.ll =================================================================== --- test/CodeGen/SystemZ/loop-01.ll +++ test/CodeGen/SystemZ/loop-01.ll @@ -1,7 +1,7 @@ ; Test loop tuning. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-block-placement | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -disable-block-placement \ ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-Z13 ; Test that strength reduction is applied to addresses with a scale factor, Index: test/CodeGen/SystemZ/loop-02.ll =================================================================== --- test/CodeGen/SystemZ/loop-02.ll +++ test/CodeGen/SystemZ/loop-02.ll @@ -1,7 +1,7 @@ ; Test BRCTH. ; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z196 \ -; RUN: -no-integrated-as | FileCheck %s +; RUN: -no-integrated-as -disable-block-placement | FileCheck %s ; Test a loop that should be converted into dbr form and then use BRCTH. define void @f2(i32 *%src, i32 *%dest) { Index: test/CodeGen/SystemZ/swifterror.ll =================================================================== --- test/CodeGen/SystemZ/swifterror.ll +++ test/CodeGen/SystemZ/swifterror.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=s390x-linux-gnu| FileCheck %s -; RUN: llc < %s -O0 -mtriple=s390x-linux-gnu | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s +; RUN: llc < %s -O0 -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck --check-prefix=CHECK-O0 %s declare i8* @malloc(i64) declare void @free(i8*) Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -124,7 +124,7 @@ ret i32 %sum } -!0 = !{!"branch_weights", i32 4, i32 64} +!0 = !{!"branch_weights", i32 1, i32 64} define i32 @test_loop_early_exits(i32 %i, i32* %a) { ; Check that we sink early exit blocks out of loop bodies. @@ -961,11 +961,11 @@ ; CHECK: %while.cond.outer ; Third rotated loop top ; CHECK: .p2align +; CHECK: %if.end20 ; CHECK: %while.cond ; CHECK: %while.body ; CHECK: %land.lhs.true ; CHECK: %if.then19 -; CHECK: %if.end20 ; CHECK: %if.then8 ; CHECK: ret Index: test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir =================================================================== --- test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir +++ test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir @@ -27,12 +27,13 @@ # return result; # } # -# CHECK: 49: eb 4a jmp 74 -# CHECK: 57: eb 3c jmp 60 -# CHECK: 65: eb 2e jmp 46 -# CHECK: 73: eb 20 jmp 32 -# CHECK: 81: eb 12 jmp 18 -# CHECK: 93: 7f 8b jg -117 +# CHECK: 35: 77 e9 ja -23 +# CHECK: 45: eb d9 jmp -39 +# CHECK: 4a: eb d4 jmp -44 +# CHECK: 4f: eb cf jmp -49 +# CHECK: 54: eb ca jmp -54 +# CHECK: 59: eb c5 jmp -59 +# CHECK: 64: eb ba jmp -70 # Test 2: # @@ -57,11 +58,11 @@ # return w; # } # -# CHECK: 129: eb 13 jmp 19 -# CHECK: 12e: eb a0 jmp -96 -# CHECK: 132: eb 9c jmp -100 -# CHECK: 137: eb 97 jmp -105 -# CHECK: 13c: eb 92 jmp -110 +# CHECK: f9: eb 13 jmp 19 +# CHECK: fe: eb a0 jmp -96 +# CHECK: 102: eb 9c jmp -100 +# CHECK: 107: eb 97 jmp -105 +# CHECK: 10c: eb 92 jmp -110 --- | ; ModuleID = 'D:\iusers\opaparo\dev_test\branch_instruction_and_target_split_perf_nops.ll' source_filename = "D:\5C\5Ciusers\5C\5Copaparo\5C\5Cdev_test\5C\5Cbranch_instruction_and_target_split_perf_nops.c" Index: test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll =================================================================== --- test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll +++ test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll @@ -1,13 +1,12 @@ ; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s define void @foo() { -; Test that when determining the edge probability from a node in an inner loop -; to a node in an outer loop, the weights on edges in the inner loop should be -; ignored if we are building the chain for the outer loop. +; After moving the latch to the top of loop, there is no fall through from the +; latch to outer loop. ; ; CHECK-LABEL: foo: -; CHECK: callq c ; CHECK: callq b +; CHECK: callq c entry: %call = call zeroext i1 @a() Index: test/CodeGen/X86/code_placement_loop_rotation2.ll =================================================================== --- test/CodeGen/X86/code_placement_loop_rotation2.ll +++ test/CodeGen/X86/code_placement_loop_rotation2.ll @@ -5,13 +5,13 @@ ; Test a nested loop case when profile data is not available. ; ; CHECK-LABEL: foo: +; CHECK: callq h ; CHECK: callq b +; CHECK: callq g +; CHECK: callq f ; CHECK: callq c ; CHECK: callq d ; CHECK: callq e -; CHECK: callq f -; CHECK: callq g -; CHECK: callq h entry: br label %header Index: test/CodeGen/X86/pr5145.ll =================================================================== --- test/CodeGen/X86/pr5145.ll +++ test/CodeGen/X86/pr5145.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s +; RUN: llc -disable-block-placement -mtriple=x86_64-- < %s | FileCheck %s @sc8 = external global i8 define void @atomic_maxmin_i8() { Index: test/CodeGen/X86/swifterror.ll =================================================================== --- test/CodeGen/X86/swifterror.ll +++ test/CodeGen/X86/swifterror.ll @@ -1,6 +1,6 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-APPLE %s -; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-O0 %s -; RUN: llc -verify-machineinstrs < %s -mtriple=i386-apple-darwin | FileCheck --check-prefix=CHECK-i386 %s +; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-APPLE %s +; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=x86_64-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc -verify-machineinstrs < %s -mtriple=i386-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-i386 %s declare i8* @malloc(i64) declare void @free(i8*) Index: test/CodeGen/X86/tail-dup-merge-loop-headers.ll =================================================================== --- test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -73,11 +73,11 @@ ; CHECK-LABEL: loop_shared_header ; CHECK: # %entry ; CHECK: # %shared_preheader +; CHECK: # %outer_loop_latch ; CHECK: # %shared_loop_header ; CHECK: # %inner_loop_body ; CHECK: # %outer_loop_latch ; CHECK: # %merge_predecessor_split -; CHECK: # %outer_loop_latch ; CHECK: # %cleanup define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i32 %wwprva, i32 %e_lfanew, i8* readonly %wwp, i32 %wwpsz, i16 zeroext %sects) local_unnamed_addr #0 { entry: Index: test/CodeGen/X86/tail-dup-repeat.ll =================================================================== --- test/CodeGen/X86/tail-dup-repeat.ll +++ test/CodeGen/X86/tail-dup-repeat.ll @@ -12,7 +12,9 @@ entry: br label %for.cond -; CHECK: {{^}}.[[HEADER:LBB0_[1-9]]]: # %for.cond +; CHECK: {{^}}.[[HEADER:LBB0_[1-9]]]: # %dup1 + +; CHECK: # %for.cond for.cond: ; preds = %dup1, %entry br i1 %a1, label %land.lhs.true, label %if.end56 @@ -28,11 +30,9 @@ br label %dup1 ; CHECK: # %if.end70 -; CHECK-NEXT: # in Loop: ; CHECK-NEXT: movl $12, (%rdx) -; CHECK-NEXT: movl $2, (%rcx) -; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .[[HEADER]] +; CHECK: movl $2, (%rcx) +; CHECK-NEXT: jmp .[[HEADER]] if.end70: ; preds = %if.end56 store i32 12, i32* %a4, align 8 br label %dup2 Index: test/CodeGen/X86/x86-cmov-converter.ll =================================================================== --- test/CodeGen/X86/x86-cmov-converter.ll +++ test/CodeGen/X86/x86-cmov-converter.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck %s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; This test checks that x86-cmov-converter optimization transform CMOV Index: test/DebugInfo/X86/dbg-value-transfer-order.ll =================================================================== --- test/DebugInfo/X86/dbg-value-transfer-order.ll +++ test/DebugInfo/X86/dbg-value-transfer-order.ll @@ -24,6 +24,12 @@ ; with the Orders insertion point vector. ; CHECK-LABEL: f: # @f +; CHECK: .LBB0_3: +; Check that this DEBUG_VALUE comes before the left shift. +; CHECK: #DEBUG_VALUE: bit_offset <- $ecx +; CHECK: .cv_loc 0 1 8 28 # t.c:8:28 +; CHECK: movl $1, %[[reg:[^ ]*]] +; CHECK: shll %cl, %[[reg]] ; CHECK: .LBB0_1: # %while.body ; CHECK: movl $32, %ecx ; CHECK: testl {{.*}} @@ -31,12 +37,7 @@ ; CHECK: # %bb.2: # %if.then ; CHECK: callq if_then ; CHECK: movl %eax, %ecx -; CHECK: .LBB0_3: # %if.end -; Check that this DEBUG_VALUE comes before the left shift. -; CHECK: #DEBUG_VALUE: bit_offset <- $ecx -; CHECK: .cv_loc 0 1 8 28 # t.c:8:28 -; CHECK: movl $1, %[[reg:[^ ]*]] -; CHECK: shll %cl, %[[reg]] +; CHECK: jmp .LBB0_3 ; ModuleID = 't.c' source_filename = "t.c"