Index: lib/CodeGen/MachineLICM.cpp =================================================================== --- lib/CodeGen/MachineLICM.cpp +++ lib/CodeGen/MachineLICM.cpp @@ -1062,6 +1062,11 @@ if (MI.isImplicitDef()) return true; + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(MI, AA)) + return true; + // Besides removing computation from the loop, hoisting an instruction has // these effects: // @@ -1083,11 +1088,6 @@ return false; } - // Rematerializable instructions should always be hoisted since the register - // allocator can just pull them down again when needed. - if (TII->isTriviallyReMaterializable(MI, AA)) - return true; - // FIXME: If there are long latency loop-invariant instructions inside the // loop at this point, why didn't the optimizer's LICM hoist them? for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { Index: test/CodeGen/AArch64/cmpxchg-idioms.ll =================================================================== --- test/CodeGen/AArch64/cmpxchg-idioms.ll +++ test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -4,17 +4,13 @@ ; CHECK-LABEL: test_return: ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: -; CHECK: ldaxr [[LOADED:w[0-9]+]], [x0] +; CHECK: ldaxr [[LOADED:w[0-9]+]], [x8] ; CHECK: cmp [[LOADED]], w1 ; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]] -; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x0] +; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x8] ; CHECK: cbnz [[STATUS]], [[LOOP]] -; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK: orr w0, wzr, #0x1 -; CHECK: ret - ; CHECK: [[FAILED]]: ; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} ; CHECK: mov w0, wzr @@ -38,10 +34,6 @@ ; CHECK: cbnz [[STATUS]], [[LOOP]] ; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} - ; FIXME: DAG combine should be able to deal with this. -; CHECK: orr [[TMP:w[0-9]+]], wzr, #0x1 -; CHECK: eor w0, [[TMP]], #0x1 -; CHECK: ret ; CHECK: [[FAILED]]: ; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} Index: test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll =================================================================== --- test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll +++ test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll @@ -16,7 +16,6 @@ for.body: ; CHECK: %for. ; CHECK: mov{{.*}} r{{[0-9]+}}, #{{[01]}} -; CHECK: mov{{.*}} r{{[0-9]+}}, #{{[01]}} ; CHECK-NOT: mov r{{[0-9]+}}, #{{[01]}} %arrayidx = getelementptr i32, i32* %A, i32 %0 %tmp4 = load i32, i32* %arrayidx, align 4 Index: test/CodeGen/ARM/atomic-cmpxchg.ll =================================================================== --- test/CodeGen/ARM/atomic-cmpxchg.ll +++ test/CodeGen/ARM/atomic-cmpxchg.ll @@ -35,17 +35,18 @@ ; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8: ; CHECK-ARMV6-NEXT: .fnstart +; CHECK-ARMV6-NEXT: mov [[REG:r[0-9]+]], r0 ; CHECK-ARMV6-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 +; CHECK-ARMV6-NEXT: mov r0, #0 ; CHECK-ARMV6-NEXT: [[TRY:.LBB[0-9_]+]]: -; CHECK-ARMV6-NEXT: ldrexb [[LD:r[0-9]+]], [r0] +; CHECK-ARMV6-NEXT: ldrexb [[LD:r[0-9]+]], {{\[}}[[REG]]{{\]}} ; CHECK-ARMV6-NEXT: cmp [[LD]], [[DESIRED]] -; CHECK-ARMV6-NEXT: movne [[RES:r[0-9]+]], #0 ; CHECK-ARMV6-NEXT: bxne lr -; CHECK-ARMV6-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] +; CHECK-ARMV6-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, {{\[}}[[REG]]{{\]}} ; CHECK-ARMV6-NEXT: cmp [[SUCCESS]], #0 -; CHECK-ARMV6-NEXT: moveq [[RES]], #1 -; CHECK-ARMV6-NEXT: bxeq lr -; CHECK-ARMV6-NEXT: b [[TRY]] +; CHECK-ARMV6-NEXT: bne [[TRY]] +; CHECK-ARMV6-NEXT: mov r0, #1 +; CHECK-ARMV6-NEXT: bx lr ; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8: ; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1 @@ -61,33 +62,35 @@ ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8: ; CHECK-ARMV7-NEXT: .fnstart +; CHECK-ARMV7-NEXT: mov [[REG:r[0-9]+]], r0 ; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 +; CHECK-ARMV7-NEXT: mov r0, #1 ; CHECK-ARMV7-NEXT: b [[TRY:.LBB[0-9_]+]] ; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]: -; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] +; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, {{\[}}[[REG]]{{\]}} ; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0 -; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1 ; CHECK-ARMV7-NEXT: bxeq lr ; CHECK-ARMV7-NEXT: [[TRY]]: -; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0] +; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], {{\[}}[[REG]]{{\]}} ; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]] ; CHECK-ARMV7-NEXT: beq [[HEAD]] ; CHECK-ARMV7-NEXT: clrex -; CHECK-ARMV7-NEXT: mov [[RES]], #0 +; CHECK-ARMV7-NEXT: mov r0, #0 ; CHECK-ARMV7-NEXT: bx lr ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: ; CHECK-THUMBV7-NEXT: .fnstart -; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 +; CHECK-THUMBV7-NEXT: mov [[REG:r[0-9]+]], r0 +; CHECK-THUMBV7-NEXT: uxtb{{.*}} [[DESIRED:r[0-9]+]], r1 +; CHECK-THUMBV7-NEXT: movs r0, #1 ; CHECK-THUMBV7-NEXT: b [[TRYLD:.LBB[0-9_]+]] ; CHECK-THUMBV7-NEXT: [[TRYST:.LBB[0-9_]+]]: -; CHECK-THUMBV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] +; CHECK-THUMBV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, {{\[}}[[REG]]{{\]}} ; CHECK-THUMBV7-NEXT: cmp [[SUCCESS]], #0 -; CHECK-THUMBV7-NEXT: itt eq -; CHECK-THUMBV7-NEXT: moveq r0, #1 +; CHECK-THUMBV7-NEXT: it eq ; CHECK-THUMBV7-NEXT: bxeq lr ; CHECK-THUMBV7-NEXT: [[TRYLD]]: -; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0] +; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], {{\[}}[[REG]]{{\]}} ; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]] ; CHECK-THUMBV7-NEXT: beq [[TRYST:.LBB[0-9_]+]] ; CHECK-THUMBV7-NEXT: clrex Index: test/CodeGen/X86/licm-nested.ll =================================================================== --- test/CodeGen/X86/licm-nested.ll +++ test/CodeGen/X86/licm-nested.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 4 +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 6 ; MachineLICM should be able to hoist the symbolic addresses out of ; the inner loops. Index: test/CodeGen/X86/loop-search.ll =================================================================== --- test/CodeGen/X86/loop-search.ll +++ test/CodeGen/X86/loop-search.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s ; This test comes from PR27136 @@ -10,19 +10,20 @@ ; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: jle LBB0_1 ; CHECK-NEXT: ## BB#4: ## %for.body.preheader -; CHECK-NEXT: movslq %edx, %rax -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movslq %edx, %rcx +; ###### This loop invariant was hoisted from the for body ###### +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_5: ## %for.body ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmpl %edi, (%rsi,%rcx,4) +; CHECK-NEXT: cmpl %edi, (%rsi,%rdx,4) ; CHECK-NEXT: je LBB0_6 ; CHECK-NEXT: ## BB#2: ## %for.cond ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: incq %rcx -; CHECK-NEXT: cmpq %rax, %rcx +; CHECK-NEXT: incq %rdx +; CHECK-NEXT: cmpq %rcx, %rdx ; CHECK-NEXT: jl LBB0_5 -; ### FIXME: BB#3 and LBB0_1 should be merged ; CHECK-NEXT: ## BB#3: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: %AL %AL %EAX @@ -31,11 +32,9 @@ ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq -; CHECK-NEXT: LBB0_6: -; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: LBB0_6: ## %cleanup ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq -; entry: %cmp5 = icmp sgt i32 %count, 0 br i1 %cmp5, label %for.body.preheader, label %cleanup Index: test/CodeGen/X86/tail-dup-merge-loop-headers.ll =================================================================== --- test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -75,7 +75,6 @@ ; CHECK: # %shared_preheader ; CHECK: # %shared_loop_header ; CHECK: # %inner_loop_body -; CHECK: # %outer_loop_latch ; CHECK: # %merge_predecessor_split ; CHECK: # %outer_loop_latch ; CHECK: # %cleanup