diff --git a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir --- a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir +++ b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir @@ -1,171 +1,87 @@ -# RUN: llc -run-pass si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s ---- | - - define amdgpu_kernel void @vccz_corrupt_workaround(float %cond, i32 addrspace(1)* %out) #0 { - entry: - %cmp0 = fcmp oeq float %cond, 0.000000e+00 - br i1 %cmp0, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0 - - else: ; preds = %entry - store volatile i32 100, i32 addrspace(1)* undef - br label %done, !structurizecfg.uniform !0 - - if: ; preds = %entry - store volatile i32 9, i32 addrspace(1)* undef - br label %done, !structurizecfg.uniform !0 - - done: ; preds = %if, %else - %value = phi i32 [ 0, %if ], [ 1, %else ] - store i32 %value, i32 addrspace(1)* %out - ret void - } - - define amdgpu_kernel void @vccz_corrupt_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 { - entry: - br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0 - - else: ; preds = %entry - store volatile i32 100, i32 addrspace(1)* undef - br label %done, !structurizecfg.uniform !0 - - if: ; preds = %entry - store volatile i32 9, i32 addrspace(1)* undef - br label %done, !structurizecfg.uniform !0 - - done: ; preds = %if, %else - %value = phi i32 [ 0, %if ], [ 1, %else ] - store i32 %value, i32 addrspace(1)* %out - ret void - } - - attributes #0 = { nounwind } - attributes #1 = { readnone } - - !0 = !{} - -... +# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s -check-prefixes=CHECK,SI +# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx900 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX9 +# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX10 --- # CHECK-LABEL: name: vccz_corrupt_workaround # CHECK: $vcc = V_CMP_EQ_F32 -# CHECK-NEXT: S_WAITCNT 127 -# CHECK-NEXT: $vcc = S_MOV_B64 $vcc +# SI-NEXT: S_WAITCNT 127 +# SI-NEXT: $vcc = S_MOV_B64 $vcc # CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed $vcc -name: vccz_corrupt_workaround -alignment: 1 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false +name: vccz_corrupt_workaround tracksRegLiveness: true -liveins: - - { reg: '$sgpr0_sgpr1' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: $sgpr0_sgpr1, $vcc - - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 $vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $exec S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - bb.2.if: + bb.2: liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 9, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr0 = V_MOV_B32_e32 0, implicit $exec S_BRANCH %bb.3 - bb.1.else: + bb.1: liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 100, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr0 = V_MOV_B32_e32 1, implicit $exec - bb.3.done: + bb.3: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... --- # CHECK-LABEL: name: vccz_corrupt_undef_vcc -# CHECK: S_WAITCNT 3855 +# CHECK: BUFFER_STORE_DWORD_OFFSET +# SI-NEXT: S_WAITCNT 3855 # CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 -name: vccz_corrupt_undef_vcc -alignment: 1 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false +name: vccz_corrupt_undef_vcc tracksRegLiveness: true -liveins: - - { reg: '$sgpr0_sgpr1' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: +body: | + bb.0: liveins: $sgpr0_sgpr1 - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 S_CBRANCH_VCCZ %bb.1, implicit undef $vcc - bb.2.if: + bb.2: liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 9, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr0 = V_MOV_B32_e32 0, implicit $exec S_BRANCH %bb.3 - bb.1.else: + bb.1: liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 100, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr0 = V_MOV_B32_e32 1, implicit $exec - bb.3.done: + bb.3: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ...