diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -147,7 +147,8 @@ bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const; bool selectG_SHUFFLE_VECTOR(MachineInstr &I) const; bool selectAMDGPU_BUFFER_ATOMIC_FADD(MachineInstr &I) const; - bool selectGlobalAtomicFaddIntrinsic(MachineInstr &I) const; + bool selectGlobalAtomicFadd(MachineInstr &I, MachineOperand &AddrOp, + MachineOperand &DataOp) const; bool selectBVHIntrinsic(MachineInstr &I) const; std::pair selectVOP3ModsImpl(MachineOperand &Root, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1716,7 +1716,7 @@ case Intrinsic::amdgcn_s_barrier: return selectSBarrier(I); case Intrinsic::amdgcn_global_atomic_fadd: - return selectGlobalAtomicFaddIntrinsic(I); + return selectGlobalAtomicFadd(I, I.getOperand(2), I.getOperand(3)); default: { return selectImpl(I, *CoverageInfo); } @@ -2319,6 +2319,13 @@ bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW( MachineInstr &I) const { + if (I.getOpcode() == TargetOpcode::G_ATOMICRMW_FADD) { + const LLT PtrTy = MRI->getType(I.getOperand(1).getReg()); + unsigned AS = PtrTy.getAddressSpace(); + if (AS == AMDGPUAS::GLOBAL_ADDRESS) + return selectGlobalAtomicFadd(I, I.getOperand(1), I.getOperand(2)); + } + initM0(I); return selectImpl(I, *CoverageInfo); } @@ -2960,11 +2967,14 @@ return true; } -bool AMDGPUInstructionSelector::selectGlobalAtomicFaddIntrinsic( - MachineInstr &MI) const{ +bool AMDGPUInstructionSelector::selectGlobalAtomicFadd( + MachineInstr &MI, MachineOperand &AddrOp, MachineOperand &DataOp) const { - if (STI.hasGFX90AInsts()) + if (STI.hasGFX90AInsts()) { + // gfx90a adds return versions of the global atomic fadd instructions so no + // special handling is required. return selectImpl(MI, *CoverageInfo); + } MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); @@ -2981,9 +2991,9 @@ // FIXME: This is only needed because tablegen requires number of dst operands // in match and replace pattern to be the same. Otherwise patterns can be // exported from SDag path. - auto Addr = selectFlatOffsetImpl(MI.getOperand(2)); + auto Addr = selectFlatOffsetImpl(AddrOp); - Register Data = MI.getOperand(3).getReg(); + Register Data = DataOp.getReg(); const unsigned Opc = MRI->getType(Data).isVector() ? AMDGPU::GLOBAL_ATOMIC_PK_ADD_F16 : AMDGPU::GLOBAL_ATOMIC_ADD_F32; auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1291,12 +1291,14 @@ Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}}); } + auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD); if (ST.hasLDSFPAtomics()) { - auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD) - .legalFor({{S32, LocalPtr}, {S32, RegionPtr}}); + Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}}); if (ST.hasGFX90AInsts()) Atomic.legalFor({{S64, LocalPtr}}); } + if (ST.hasAtomicFaddInsts()) + Atomic.legalFor({{S32, GlobalPtr}}); // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output // demarshalling diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir @@ -0,0 +1,22 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -O0 -run-pass=legalizer %s -o - | FileCheck %s + +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel.*' -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s + +# ERR: remark: :0:0: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_FADD %0:_(p1), %1:_ :: (load store seq_cst 4, addrspace 1) (in function: atomicrmw_fadd_global_i32) + +--- +name: atomicrmw_fadd_global_i32 + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: atomicrmw_fadd_global_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $sgpr2 + %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst 4, addrspace 1) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir rename from llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir rename to llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir