Index: include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -131,6 +131,8 @@ // G_STORE with a non-atomic MachineMemOperand. def : GINodeEquiv { let CheckMMOIsNonAtomic = 1; } +def : GINodeEquiv { let CheckMMOIsNonAtomic = 0; } + def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; Index: lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- lib/Target/AMDGPU/AMDGPUGISel.td +++ lib/Target/AMDGPU/AMDGPUGISel.td @@ -46,12 +46,19 @@ GIComplexOperandMatcher, GIComplexPatternEquiv; +// FIXME: Why are the atomic versions separated? def gi_flat_offset : GIComplexOperandMatcher, GIComplexPatternEquiv; def gi_flat_offset_signed : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_flat_atomic : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_flat_signed_atomic : + GIComplexOperandMatcher, + GIComplexPatternEquiv; def gi_mubuf_scratch_offset : GIComplexOperandMatcher, @@ -71,6 +78,8 @@ // distinction. def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; + def : GINodeEquiv; def : GINodeEquiv; Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -452,11 +452,15 @@ } // End IsLoad = 1 -let AddressSpaces = LoadAddress_local.AddrSpaces in { - -def atomic_load_32_local_m0 : LoadFrag; -def atomic_load_64_local_m0 : LoadFrag; - +let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { +def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_32_glue node:$ptr)> { + let MemoryVT = i32; +} +def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_64_glue node:$ptr)> { + let MemoryVT = i64; +} } // End let AddressSpaces = LoadAddress_local.AddrSpaces Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir @@ -0,0 +1,308 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + +--- + +name: load_atomic_flat_s32_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_atomic_flat_v2s16_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_v2s16_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 4) + ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-LABEL: name: load_atomic_flat_v2s16_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 4) + ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_atomic_flat_p3_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_p3_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst 4) + ; GFX7: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-LABEL: name: load_atomic_flat_p3_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst 4) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_atomic_flat_s64_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_s64_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_flat_v2s32_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_v2s32_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_flat_v4s16_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_v4s16_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-LABEL: name: load_atomic_flat_v4s16_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_flat_p1_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_p1_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-LABEL: name: load_atomic_flat_p1_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_flat_p0_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_p0_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX9-LABEL: name: load_atomic_flat_p0_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_flat_s32_seq_cst_gep_m2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -2048 + %2:vgpr(p0) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_atomic_flat_s32_seq_cst_gep_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 4095 + %2:vgpr(p0) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 0) + $vgpr0 = COPY %3 + +... + Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir @@ -0,0 +1,363 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + +--- + +name: load_atomic_global_s32_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX6: $vgpr0 = COPY [[LOAD]](s32) + ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_atomic_global_v2s16_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_atomic_global_v2s16_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-LABEL: name: load_atomic_global_v2s16_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-LABEL: name: load_atomic_global_v2s16_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_atomic_global_p3_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_atomic_global_p3_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX6: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-LABEL: name: load_atomic_global_p3_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-LABEL: name: load_atomic_global_p3_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_atomic_global_s64_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_atomic_global_s64_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_global_v2s32_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_atomic_global_v2s32_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_global_v4s16_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_atomic_global_v4s16_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-LABEL: name: load_atomic_global_v4s16_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-LABEL: name: load_atomic_global_v4s16_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_global_p1_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_atomic_global_p1_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-LABEL: name: load_atomic_global_p1_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-LABEL: name: load_atomic_global_p1_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_global_p0_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_atomic_global_p0_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX7-LABEL: name: load_atomic_global_p0_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX9-LABEL: name: load_atomic_global_p0_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_global_s32_seq_cst_gep_m2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 -2048 + ; GFX6: [[GEP:%[0-9]+]]:vgpr(p1) = G_GEP [[COPY]], [[C]](s64) + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[GEP]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX6: $vgpr0 = COPY [[LOAD]](s32) + ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -2048 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_atomic_global_s32_seq_cst_gep_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 4095 + ; GFX6: [[GEP:%[0-9]+]]:vgpr(p1) = G_GEP [[COPY]], [[C]](s64) + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[GEP]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX6: $vgpr0 = COPY [[LOAD]](s32) + ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 4095 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 1) + $vgpr0 = COPY %3 + +... + Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir @@ -0,0 +1,314 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + +--- + +name: load_atomic_local_s32_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_atomic_local_v2s16_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_atomic_local_v2s16_seq_cst + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-LABEL: name: load_atomic_local_v2s16_seq_cst + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX9-LABEL: name: load_atomic_local_v2s16_seq_cst + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_atomic_local_p3_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_atomic_local_p3_seq_cst + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-LABEL: name: load_atomic_local_p3_seq_cst + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX9-LABEL: name: load_atomic_local_p3_seq_cst + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_atomic_local_s64_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_atomic_local_s64_seq_cst + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-LABEL: name: load_atomic_local_s64_seq_cst + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX9-LABEL: name: load_atomic_local_s64_seq_cst + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_local_v2s32_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_atomic_local_v2s32_seq_cst + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-LABEL: name: load_atomic_local_v2s32_seq_cst + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX9-LABEL: name: load_atomic_local_v2s32_seq_cst + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_local_v4s16_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_atomic_local_v4s16_seq_cst + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-LABEL: name: load_atomic_local_v4s16_seq_cst + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX9-LABEL: name: load_atomic_local_v4s16_seq_cst + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_local_p1_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_atomic_local_p1_seq_cst + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-LABEL: name: load_atomic_local_p1_seq_cst + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX9-LABEL: name: load_atomic_local_p1_seq_cst + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_local_p0_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_atomic_local_p0_seq_cst + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-LABEL: name: load_atomic_local_p0_seq_cst + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX9-LABEL: name: load_atomic_local_p0_seq_cst + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_atomic_local_s32_seq_cst_gep_65535 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 %2, 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 65535 + %2:vgpr(p3) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 3) + $vgpr0 = COPY %3 + +... Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -1644,79 +1644,3 @@ $vgpr0 = COPY %3 ... - ---- - -name: load_atomic_flat_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true - - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; GFX7-LABEL: name: load_atomic_flat_s32 - ; GFX7: liveins: $vgpr0_vgpr1 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) - ; GFX7: $vgpr0 = COPY [[LOAD]](s32) - ; GFX8-LABEL: name: load_atomic_flat_s32 - ; GFX8: liveins: $vgpr0_vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) - ; GFX8: $vgpr0 = COPY [[LOAD]](s32) - ; GFX9-LABEL: name: load_atomic_flat_s32 - ; GFX9: liveins: $vgpr0_vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) - ; GFX9: $vgpr0 = COPY [[LOAD]](s32) - ; GFX10-LABEL: name: load_atomic_flat_s32 - ; GFX10: liveins: $vgpr0_vgpr1 - ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) - ; GFX10: $vgpr0 = COPY [[LOAD]](s32) - %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load monotonic 4, align 4, addrspace 0) - $vgpr0 = COPY %1 - -... - ---- - -name: load_atomic_flat_s64 -legalized: true -regBankSelected: true -tracksRegLiveness: true - - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; GFX7-LABEL: name: load_atomic_flat_s64 - ; GFX7: liveins: $vgpr0_vgpr1 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) - ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; GFX8-LABEL: name: load_atomic_flat_s64 - ; GFX8: liveins: $vgpr0_vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) - ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; GFX9-LABEL: name: load_atomic_flat_s64 - ; GFX9: liveins: $vgpr0_vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; GFX10-LABEL: name: load_atomic_flat_s64 - ; GFX10: liveins: $vgpr0_vgpr1 - ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) - ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_LOAD %0 :: (load monotonic 8, align 8, addrspace 0) - $vgpr0_vgpr1 = COPY %1 - -... Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir @@ -1584,79 +1584,3 @@ $vgpr0 = COPY %3 ... - ---- - -name: load_atomic_global_s32 -legalized: true -regBankSelected: true -tracksRegLiveness: true - - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; GFX7-LABEL: name: load_atomic_global_s32 - ; GFX7: liveins: $vgpr0_vgpr1 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) - ; GFX7: $vgpr0 = COPY [[LOAD]](s32) - ; GFX8-LABEL: name: load_atomic_global_s32 - ; GFX8: liveins: $vgpr0_vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) - ; GFX8: $vgpr0 = COPY [[LOAD]](s32) - ; GFX9-LABEL: name: load_atomic_global_s32 - ; GFX9: liveins: $vgpr0_vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) - ; GFX9: $vgpr0 = COPY [[LOAD]](s32) - ; GFX10-LABEL: name: load_atomic_global_s32 - ; GFX10: liveins: $vgpr0_vgpr1 - ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) - ; GFX10: $vgpr0 = COPY [[LOAD]](s32) - %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load monotonic 4, align 4, addrspace 1) - $vgpr0 = COPY %1 - -... - ---- - -name: load_atomic_global_s64 -legalized: true -regBankSelected: true -tracksRegLiveness: true - - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; GFX7-LABEL: name: load_atomic_global_s64 - ; GFX7: liveins: $vgpr0_vgpr1 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) - ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; GFX8-LABEL: name: load_atomic_global_s64 - ; GFX8: liveins: $vgpr0_vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) - ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; GFX9-LABEL: name: load_atomic_global_s64 - ; GFX9: liveins: $vgpr0_vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - ; GFX10-LABEL: name: load_atomic_global_s64 - ; GFX10: liveins: $vgpr0_vgpr1 - ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) - ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) - %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_LOAD %0 :: (load monotonic 8, align 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 - -...