diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umax.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -march=amdgcn -run-pass=finalize-isel %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -run-pass=finalize-isel -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s --- name: uniform_value @@ -36,38 +36,44 @@ name: divergent_value machineFunctionInfo: isEntryFunction: true +tracksRegLiveness: true body: | + ; GCN-LABEL: name: divergent_value + ; GCN: bb.0.entry: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.2 + ; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.0, %11, %bb.2 + ; GCN-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sgpr_32 = S_FF1_I32_B64 [[PHI1]] + ; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sgpr_32 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]] + ; GCN-NEXT: [[S_MAX_U32_:%[0-9]+]]:sgpr_32 = S_MAX_U32 [[PHI]], [[V_READLANE_B32_]], implicit-def $scc + ; GCN-NEXT: [[S_BITSET0_B64_:%[0-9]+]]:sreg_64_xexec = S_BITSET0_B64 [[S_FF1_I32_B64_]], [[PHI1]] + ; GCN-NEXT: S_CMP_LG_U64 [[S_BITSET0_B64_]], 0, implicit-def $scc + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MAX_U32_]] + ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY2]], killed [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY1]], %bb.3 + ; GCN-NEXT: S_ENDPGM 0 bb.0.entry: liveins: $vgpr0, $sgpr0_sgpr1 - - ; GCN-LABEL: name: divergent_value - ; GCN: successors: %bb.1(0x80000000) - ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 - ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec - ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 - ; GCN-NEXT: S_BRANCH %bb.1 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: .1: - ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[PHI:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.1 - ; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.0, %10, %bb.1 - ; GCN-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sgpr_32 = S_FF1_I32_B64 [[PHI1]] - ; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sgpr_32 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]] - ; GCN-NEXT: [[S_MAX_U32_:%[0-9]+]]:sgpr_32 = S_MAX_U32 [[PHI]], [[V_READLANE_B32_]], implicit-def $scc - ; GCN-NEXT: [[S_BITSET0_B64_:%[0-9]+]]:sreg_64_xexec = S_BITSET0_B64 [[S_FF1_I32_B64_]], [[PHI1]] - ; GCN-NEXT: S_CMP_LG_U64 [[S_BITSET0_B64_]], 0, implicit-def $scc - ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: .2: - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MAX_U32_]] - ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY2]], killed [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 %1:sgpr_64(p4) = COPY $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 36, 0 @@ -75,6 +81,8 @@ %6:sgpr_32 = WAVE_REDUCE_UMAX_PSEUDO_U32 %0, 1, implicit $exec %7:vgpr_32 = COPY %6 GLOBAL_STORE_DWORD_SADDR killed %5, killed %7, killed %4, 0, 0, implicit $exec + bb.1: + %8:vgpr_32 = PHI %0, %bb.0 S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.reduce.umin.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -march=amdgcn -run-pass=finalize-isel %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -run-pass=finalize-isel -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s --- name: uniform_value @@ -36,38 +36,44 @@ name: divergent_value machineFunctionInfo: isEntryFunction: true +tracksRegLiveness: true body: | + ; GCN-LABEL: name: divergent_value + ; GCN: bb.0.entry: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 4294967295 + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.2 + ; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.0, %11, %bb.2 + ; GCN-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sgpr_32 = S_FF1_I32_B64 [[PHI1]] + ; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sgpr_32 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]] + ; GCN-NEXT: [[S_MIN_U32_:%[0-9]+]]:sgpr_32 = S_MIN_U32 [[PHI]], [[V_READLANE_B32_]], implicit-def $scc + ; GCN-NEXT: [[S_BITSET0_B64_:%[0-9]+]]:sreg_64_xexec = S_BITSET0_B64 [[S_FF1_I32_B64_]], [[PHI1]] + ; GCN-NEXT: S_CMP_LG_U64 [[S_BITSET0_B64_]], 0, implicit-def $scc + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MIN_U32_]] + ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY2]], killed [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY1]], %bb.3 + ; GCN-NEXT: S_ENDPGM 0 bb.0.entry: liveins: $vgpr0, $sgpr0_sgpr1 - - ; GCN-LABEL: name: divergent_value - ; GCN: successors: %bb.1(0x80000000) - ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 - ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec - ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 4294967295 - ; GCN-NEXT: S_BRANCH %bb.1 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: .1: - ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[PHI:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_]], %bb.0, %4, %bb.1 - ; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.0, %10, %bb.1 - ; GCN-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sgpr_32 = S_FF1_I32_B64 [[PHI1]] - ; GCN-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sgpr_32 = V_READLANE_B32 [[COPY1]], [[S_FF1_I32_B64_]] - ; GCN-NEXT: [[S_MIN_U32_:%[0-9]+]]:sgpr_32 = S_MIN_U32 [[PHI]], [[V_READLANE_B32_]], implicit-def $scc - ; GCN-NEXT: [[S_BITSET0_B64_:%[0-9]+]]:sreg_64_xexec = S_BITSET0_B64 [[S_FF1_I32_B64_]], [[PHI1]] - ; GCN-NEXT: S_CMP_LG_U64 [[S_BITSET0_B64_]], 0, implicit-def $scc - ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: .2: - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MIN_U32_]] - ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY2]], killed [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 %1:sgpr_64(p4) = COPY $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 36, 0 @@ -75,6 +81,8 @@ %6:sgpr_32 = WAVE_REDUCE_UMIN_PSEUDO_U32 %0, 1, implicit $exec %7:vgpr_32 = COPY %6 GLOBAL_STORE_DWORD_SADDR killed %5, killed %7, killed %4, 0, 0, implicit $exec + bb.1: + %8:vgpr_32 = PHI %0, %bb.0 S_ENDPGM 0 ...