Index: llvm/lib/CodeGen/MachineSink.cpp =================================================================== --- llvm/lib/CodeGen/MachineSink.cpp +++ llvm/lib/CodeGen/MachineSink.cpp @@ -806,12 +806,10 @@ continue; if (Reg.isPhysical()) { - if (MO.isUse() && - (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO))) - continue; - - // Don't handle non-constant and non-ignorable physical register. - return false; + if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO)) + // Don't handle non-constant and non-ignorable physical register uses. + return false; + continue; } // Users for the defs are all dominated by SuccToSinkTo. Index: llvm/test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -861,6 +861,8 @@ ; GCN-NEXT: .LBB5_2: ; %bb10 ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[14:15] +; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5] +; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] ; GCN-NEXT: s_mov_b64 s[6:7], 0 ; GCN-NEXT: s_andn2_b64 exec, exec, s[12:13] ; GCN-NEXT: s_cbranch_execz .LBB5_7 @@ -873,12 +875,10 @@ ; GCN-NEXT: ; %bb.4: ; %bb2 ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[6:7] -; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5] ; GCN-NEXT: s_mov_b32 s9, s8 ; GCN-NEXT: s_mov_b32 s10, s8 ; GCN-NEXT: s_mov_b32 s11, s8 ; GCN-NEXT: v_mov_b32_e32 v0, s8 -; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] ; GCN-NEXT: v_mov_b32_e32 v1, s9 ; GCN-NEXT: v_mov_b32_e32 v2, s10 ; GCN-NEXT: v_mov_b32_e32 v3, s11 Index: llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll +++ llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Where the mask of lanes wanting to exit the loop on this iteration is not @@ -5,13 +6,36 @@ ; control flow annotation), then lower control flow must insert an S_AND_B64 ; with exec. -; GCN-LABEL: {{^}}needs_and: - -; GCN: s_or_b64 exec, exec, [[REG1:[^ ,]*]] -; GCN: s_andn2_b64 exec, exec, [[REG2:[^ ,]*]] -; GCN: s_or_b64 [[REG2:[^ ,]*]], [[REG1:[^ ,]*]], [[REG2:[^ ,]*]] -; GCN: s_or_b64 exec, exec, [[REG2:[^ ,]*]] define void @needs_and(i32 %arg) { +; GCN-LABEL: needs_and: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s10, 1 +; GCN-NEXT: s_mov_b64 s[6:7], 0 +; GCN-NEXT: s_branch .LBB0_2 +; GCN-NEXT: .LBB0_1: ; %endif +; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; GCN-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-NEXT: s_and_b64 s[4:5], exec, vcc +; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7] +; GCN-NEXT: s_add_i32 s10, s10, 1 +; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GCN-NEXT: s_cbranch_execz .LBB0_4 +; GCN-NEXT: .LBB0_2: ; %loop +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_cmp_gt_u32_e64 s[4:5], s10, v0 +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s10, v0 +; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-NEXT: s_cbranch_execz .LBB0_1 +; GCN-NEXT: ; %bb.3: ; %then +; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4 +; GCN-NEXT: s_branch .LBB0_1 +; GCN-NEXT: .LBB0_4: ; %loopexit +; GCN-NEXT: s_or_b64 exec, exec, s[6:7] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] entry: br label %loop @@ -36,12 +60,24 @@ ; obviously already masked by exec (a V_CMP), then lower control flow can omit ; the S_AND_B64 to avoid an unnecessary instruction. -; GCN-LABEL: {{^}}doesnt_need_and: -; GCN: v_cmp{{[^ ]*}} [[REG1:[^ ,]*]] -; GCN: s_or_b64 [[REG2:[^ ,]*]], [[REG1]], -; GCN: s_andn2_b64 exec, exec, [[REG2]] - define void @doesnt_need_and(i32 %arg) { +; GCN-LABEL: doesnt_need_and: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: s_mov_b64 s[4:5], 0 +; GCN-NEXT: .LBB1_1: ; %loop +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_add_i32 s6, s6, 1 +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4 +; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GCN-NEXT: s_cbranch_execnz .LBB1_1 +; GCN-NEXT: ; %bb.2: ; %loopexit +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] entry: br label %loop @@ -59,13 +95,37 @@ ; Another case where the mask of lanes wanting to exit the loop is not masked ; by exec, because it is a function parameter. -; GCN-LABEL: {{^}}break_cond_is_arg: -; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}} -; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]] -; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]] -; GCN: s_or_b64 [[REG3]], [[REG2]], - define void @break_cond_is_arg(i32 %arg, i1 %breakcond) { +; GCN-LABEL: break_cond_is_arg: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v1, 1, v1 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 +; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-NEXT: s_mov_b32 s10, 1 +; GCN-NEXT: s_mov_b64 s[6:7], 0 +; GCN-NEXT: s_branch .LBB2_2 +; GCN-NEXT: .LBB2_1: ; %endif +; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1 +; GCN-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] +; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] +; GCN-NEXT: s_add_i32 s10, s10, 1 +; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GCN-NEXT: s_cbranch_execz .LBB2_4 +; GCN-NEXT: .LBB2_2: ; %loop +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s10, v0 +; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: s_cbranch_execz .LBB2_1 +; GCN-NEXT: ; %bb.3: ; %then +; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1 +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4 +; GCN-NEXT: s_branch .LBB2_1 +; GCN-NEXT: .LBB2_4: ; %loopexit +; GCN-NEXT: s_or_b64 exec, exec, s[6:7] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] entry: br label %loop Index: llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll +++ llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll @@ -13,12 +13,12 @@ ; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 ; GCN-NEXT: s_waitcnt_depctr 0xffe3 ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GCN-NEXT: s_and_b32 s8, exec_lo, s6 +; GCN-NEXT: s_or_b32 s7, s8, s7 ; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s7 ; GCN-NEXT: s_cbranch_execz .LBB0_5 ; GCN-NEXT: .LBB0_2: ; %bb ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: s_and_b32 s8, exec_lo, s6 -; GCN-NEXT: s_or_b32 s7, s8, s7 ; GCN-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GCN-NEXT: s_cbranch_execz .LBB0_1 ; GCN-NEXT: ; %bb.3: ; %bb1 Index: llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir @@ -0,0 +1,57 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z15 -O3 -run-pass=machine-sink %s -o - \ +# RUN: -verify-machineinstrs | FileCheck %s +# +# Test that the AGHIK can be sunk into %bb.4. It has a def of CC, but it is dead. + +--- | + define void @fun() { ret void } +... + +# CHECK-LABEL: bb.4: +# CHECK: %1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc +# CHECK-NEXT: CGHI %1, 0, implicit-def $cc +# CHECK-NEXT: BRC 14, 6, %bb.1, implicit $cc +# CHECK-NEXT: J %bb.5 + + +--- +name: fun +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64bit } + - { id: 1, class: gr64bit } + - { id: 2, class: grx32bit } + - { id: 3, class: gr64bit } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + + %2:grx32bit = LHIMux 0 + %3:gr64bit = IMPLICIT_DEF + + bb.1: + + %0:gr64bit = PHI %3, %bb.0, %1, %bb.4 + + bb.2: + + %1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc + CHIMux %2, 0, implicit-def $cc + BRC 14, 6, %bb.4, implicit $cc + J %bb.3 + + bb.3: + + bb.4: + + CGHI %1, 0, implicit-def $cc + BRC 14, 6, %bb.1, implicit $cc + J %bb.5 + + bb.5: + Return + +... Index: llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll =================================================================== --- llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll +++ llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll @@ -102,17 +102,17 @@ ; CHECK-NEXT: jns .LBB0_20 ; CHECK-NEXT: .LBB0_5: # %a50b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: shrl $31, %r9d ; CHECK-NEXT: movl %eax, %r10d ; CHECK-NEXT: orl %esi, %r10d ; CHECK-NEXT: jns .LBB0_26 ; CHECK-NEXT: .LBB0_6: # %a57b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: shrl $31, %r10d +; CHECK-NEXT: shrl $31, %r9d ; CHECK-NEXT: testb %r9b, %r9b ; CHECK-NEXT: je .LBB0_30 ; CHECK-NEXT: .LBB0_7: # %a66b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: shrl $31, %r10d ; CHECK-NEXT: testb %r10b, %r10b ; CHECK-NEXT: jne .LBB0_8 ; CHECK-NEXT: .p2align 4, 0x90 Index: llvm/test/CodeGen/X86/dag-update-nodetomatch.ll =================================================================== --- llvm/test/CodeGen/X86/dag-update-nodetomatch.ll +++ llvm/test/CodeGen/X86/dag-update-nodetomatch.ll @@ -196,10 +196,8 @@ ; CHECK-NEXT: ja .LBB1_14 ; CHECK-NEXT: .LBB1_7: # %vector.body.preheader ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: leaq -4(%rcx), %r8 -; CHECK-NEXT: movq %r8, %r11 -; CHECK-NEXT: shrq $2, %r11 -; CHECK-NEXT: btl $2, %r8d +; CHECK-NEXT: leaq -4(%rcx), %r11 +; CHECK-NEXT: btl $2, %r11d ; CHECK-NEXT: jb .LBB1_8 ; CHECK-NEXT: # %bb.9: # %vector.body.prol.preheader ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 @@ -208,12 +206,12 @@ ; CHECK-NEXT: movdqu %xmm0, (%r13,%rbp,8) ; CHECK-NEXT: movdqu %xmm0, 16(%r13,%rbp,8) ; CHECK-NEXT: movl $4, %r10d -; CHECK-NEXT: testq %r11, %r11 +; CHECK-NEXT: shrq $2, %r11 ; CHECK-NEXT: jne .LBB1_11 ; CHECK-NEXT: jmp .LBB1_13 ; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: xorl %r10d, %r10d -; CHECK-NEXT: testq %r11, %r11 +; CHECK-NEXT: shrq $2, %r11 ; CHECK-NEXT: je .LBB1_13 ; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 Index: llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll =================================================================== --- llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll +++ llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll @@ -60,10 +60,11 @@ ; CHECK: JMP_1 %bb.4 ; CHECK: bb.4 ; CHECK: bb.5 -; CHECK: %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg -; CHECK-LV: %3:gr64 = COPY killed %10 -; CHECK-LIS: %3:gr64 = COPY %10 -; CHECK-LV: TEST64rr killed %1, %1, implicit-def $eflags +; CHECK: %3:gr64 = COPY %10 +; CHECK-LV: %4:gr64 = COPY killed %10 +; CHECK-LV: %4:gr64 = nuw ADD64ri8 %4, 8, implicit-def dead $eflags +; CHECK-LIS: %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg +; CHECK: TEST64rr killed %1, %1, implicit-def $eflags ; CHECK: JCC_1 %bb.1, 5, implicit killed $eflags ; CHECK: JMP_1 %bb.6 define void @test2(i8 addrspace(1)* %this, i32 %0, i32 addrspace(1)* %p0, i8 addrspace(1)* %p1) gc "statepoint-example" personality i32* ()* @fake_personality_function {