Index: lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- lib/Target/AMDGPU/AMDGPUGISel.td +++ lib/Target/AMDGPU/AMDGPUGISel.td @@ -76,6 +76,17 @@ def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; class GISelSop2Pat < Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -92,7 +92,7 @@ bool selectSMRD(MachineInstr &I, ArrayRef AddrInfo) const; void initM0(MachineInstr &I) const; - bool selectG_LOAD(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; + bool selectG_LOAD_ATOMICRMW(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; bool selectG_STORE(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; bool selectG_SELECT(MachineInstr &I) const; bool selectG_BRCOND(MachineInstr &I) const; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1256,8 +1256,8 @@ } } -bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { +bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I, + CodeGenCoverage &CoverageInfo) const { initM0(I); return selectImpl(I, CoverageInfo); } @@ -1377,8 +1377,18 @@ return true; return selectImpl(I, CoverageInfo); case TargetOpcode::G_LOAD: - return selectG_LOAD(I, CoverageInfo); - + case TargetOpcode::G_ATOMIC_CMPXCHG: + case TargetOpcode::G_ATOMICRMW_XCHG: + case TargetOpcode::G_ATOMICRMW_ADD: + case TargetOpcode::G_ATOMICRMW_SUB: + case TargetOpcode::G_ATOMICRMW_AND: + case TargetOpcode::G_ATOMICRMW_OR: + case TargetOpcode::G_ATOMICRMW_XOR: + case TargetOpcode::G_ATOMICRMW_MIN: + case TargetOpcode::G_ATOMICRMW_MAX: + case TargetOpcode::G_ATOMICRMW_UMIN: + case TargetOpcode::G_ATOMICRMW_UMAX: + return selectG_LOAD_ATOMICRMW(I, CoverageInfo); case TargetOpcode::G_SELECT: return selectG_SELECT(I); case TargetOpcode::G_STORE: Index: lib/Target/AMDGPU/DSInstructions.td =================================================================== --- lib/Target/AMDGPU/DSInstructions.td +++ lib/Target/AMDGPU/DSInstructions.td @@ -745,7 +745,7 @@ } // End AddedComplexity = 100 class DSAtomicRetPat : GCNPat < (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value), - (inst $ptr, $value, offset:$offset, (i1 gds)) + (inst $ptr, getVregSrcForVT.ret:$value, offset:$offset, (i1 gds)) >; multiclass DSAtomicRetPat_mc { Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir @@ -0,0 +1,83 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + + +--- +name: atomicrmw_xchg_s32_local +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: atomicrmw_xchg_s32_local + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX7-LABEL: name: atomicrmw_xchg_s32_local + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX9-LABEL: name: atomicrmw_xchg_s32_local + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_ATOMICRMW_XCHG %0(p3), %1 :: (load store seq_cst 4, addrspace 3) + $vgpr0 = COPY %2 + +... + +--- +name: atomicrmw_xchg_s32_local_gep4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: atomicrmw_xchg_s32_local_gep4 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec + ; GFX6: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX7-LABEL: name: atomicrmw_xchg_s32_local_gep4 + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX9-LABEL: name: atomicrmw_xchg_s32_local_gep4 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_CONSTANT i32 4 + %3:vgpr(p3) = G_GEP %0, %2 + %4:vgpr(s32) = G_ATOMICRMW_XCHG %3(p3), %1 :: (load store seq_cst 4, addrspace 3) + $vgpr0 = COPY %4 + +...