Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -108,6 +108,7 @@ bool selectDivScale(MachineInstr &MI) const; bool selectIntrinsicIcmp(MachineInstr &MI) const; bool selectBallot(MachineInstr &I) const; + bool selectGroupStaticSize(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I) const; bool selectEndCfIntrinsic(MachineInstr &MI) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -904,6 +904,8 @@ return selectIntrinsicIcmp(I); case Intrinsic::amdgcn_ballot: return selectBallot(I); + case Intrinsic::amdgcn_groupstaticsize: + return selectGroupStaticSize(I); default: return selectImpl(I, *CoverageInfo); } @@ -1084,6 +1086,33 @@ return true; } +bool AMDGPUInstructionSelector::selectGroupStaticSize(MachineInstr &I) const { + Triple::OSType OS = MF->getTarget().getTargetTriple().getOS(); + + Register DstReg = I.getOperand(0).getReg(); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + unsigned Mov = DstRB->getID() == AMDGPU::SGPRRegBankID ? + AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; + + MachineBasicBlock *MBB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + + auto MIB = BuildMI(*MBB, &I, DL, TII.get(Mov), DstReg); + + if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) { + const SIMachineFunctionInfo *MFI = MF->getInfo(); + MIB.addImm(MFI->getLDSSize()); + } else { + Module *M = MF->getFunction().getParent(); + const GlobalValue *GV + = Intrinsic::getDeclaration(M, Intrinsic::amdgcn_groupstaticsize); + MIB.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO); + } + + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const { // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick // SelectionDAG uses for wave32 vs wave64. Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir @@ -0,0 +1,46 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSAPAL %s +# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSAPAL %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MESA %s + +--- +name: groupstaticsize_v +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + ldsSize: 4096 + +body: | + bb.0: + + ; HSAPAL-LABEL: name: groupstaticsize_v + ; HSAPAL: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; HSAPAL: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MESA-LABEL: name: groupstaticsize_v + ; MESA: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize, implicit $exec + ; MESA: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + S_ENDPGM 0, implicit %0 +... + +--- +name: groupstaticsize_s +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + ldsSize: 1024 + +body: | + bb.0: + + ; HSAPAL-LABEL: name: groupstaticsize_s + ; HSAPAL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; HSAPAL: S_ENDPGM 0, implicit [[S_MOV_B32_]] + ; MESA-LABEL: name: groupstaticsize_s + ; MESA: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize + ; MESA: S_ENDPGM 0, implicit [[S_MOV_B32_]] + %0:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + S_ENDPGM 0, implicit %0 +... Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll @@ -2,6 +2,10 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s +; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,NOHSA %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s + @lds0 = addrspace(3) global [512 x float] undef, align 4 @lds1 = addrspace(3) global [256 x float] undef, align 4