Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -100,6 +100,11 @@ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; + Register getKernargParameterPtr(MachineIRBuilder &B, int64_t Offset) const; + bool legalizeKernargMemParameter(MachineInstr &MI, MachineIRBuilder &B, + uint64_t Offset, + Align Alignment = Align(4)) const; + bool legalizeUnsignedDIV_REM(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsR600.h" #define DEBUG_TYPE "amdgpu-legalinfo" @@ -2941,6 +2942,43 @@ return true; } +Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B, + int64_t Offset) const { + LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); + Register KernArgReg = B.getMRI()->createGenericVirtualRegister(PtrTy); + + // TODO: If we passed in the base kernel offset we could have a better + // alignment than 4, but we don't really need it. + if (!loadInputValue(KernArgReg, B, + AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR)) + llvm_unreachable("failed to find kernarg segment ptr"); + + auto COffset = B.buildConstant(LLT::scalar(64), Offset); + // TODO: Should get nuw + return B.buildPtrAdd(PtrTy, KernArgReg, COffset).getReg(0); +} + +/// Legalize a value that's loaded from kernel arguments. This is only used by +/// legacy intrinsics. +bool AMDGPULegalizerInfo::legalizeKernargMemParameter(MachineInstr &MI, + MachineIRBuilder &B, + uint64_t Offset, + Align Alignment) const { + const LLT S32 = LLT::scalar(32); + Register DstReg = MI.getOperand(0).getReg(); + + assert(B.getMRI()->getType(DstReg) == S32 && + "unexpected kernarg parameter type"); + + Register Ptr = getKernargParameterPtr(B, Offset); + MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); + B.buildLoad(DstReg, Ptr, PtrInfo, Align(4), + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant); + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -5114,6 +5152,31 @@ case Intrinsic::amdgcn_dispatch_id: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::DISPATCH_ID); + case Intrinsic::r600_read_ngroups_x: + // TODO: Emit error for hsa + return legalizeKernargMemParameter(MI, B, + SI::KernelInputOffsets::NGROUPS_X); + case Intrinsic::r600_read_ngroups_y: + return legalizeKernargMemParameter(MI, B, + SI::KernelInputOffsets::NGROUPS_Y); + case Intrinsic::r600_read_ngroups_z: + return legalizeKernargMemParameter(MI, B, + SI::KernelInputOffsets::NGROUPS_Z); + case Intrinsic::r600_read_local_size_x: + // TODO: Could insert G_ASSERT_ZEXT from s16 + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::LOCAL_SIZE_X); + case Intrinsic::r600_read_local_size_y: + // TODO: Could insert G_ASSERT_ZEXT from s16 + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::LOCAL_SIZE_Y); + // TODO: Could insert G_ASSERT_ZEXT from s16 + case Intrinsic::r600_read_local_size_z: + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::LOCAL_SIZE_Z); + case Intrinsic::r600_read_global_size_x: + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::GLOBAL_SIZE_X); + case Intrinsic::r600_read_global_size_y: + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::GLOBAL_SIZE_Y); + case Intrinsic::r600_read_global_size_z: + return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::GLOBAL_SIZE_Z); case Intrinsic::amdgcn_fdiv_fast: return legalizeFDIVFastIntrin(MI, MRI, B); case Intrinsic::amdgcn_is_shared: Index: llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll @@ -1,5 +1,9 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=SI-NOHSA,GCN-NOHSA,FUNC %s +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=SI-NOHSA,GCN-NOHSA,FUNC %s + ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=VI-NOHSA,GCN-NOHSA,FUNC %s +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=VI-NOHSA,GCN-NOHSA,FUNC %s + ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefixes=EG,FUNC %s ; Legacy intrinsics that just read implicit parameters