Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -38,6 +38,7 @@ public: static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG); static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); + static bool hasDefinedInitializer(const GlobalValue *GV); protected: SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1167,7 +1167,7 @@ } } -static bool hasDefinedInitializer(const GlobalValue *GV) { +bool AMDGPUTargetLowering::hasDefinedInitializer(const GlobalValue *GV) { const GlobalVariable *GVar = dyn_cast(GV); if (!GVar || !GVar->hasInitializer()) return false; Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -56,6 +56,9 @@ bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; + bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const; + Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const; Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" @@ -271,6 +272,8 @@ .legalIf(isPointer(0)); setAction({G_FRAME_INDEX, PrivatePtr}, Legal); + getActionDefinitionsBuilder(G_GLOBAL_VALUE).customFor({LocalPtr}); + auto &FPOpActions = getActionDefinitionsBuilder( { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE}) @@ -818,6 +821,8 @@ return legalizeExtractVectorElt(MI, MRI, MIRBuilder); case TargetOpcode::G_INSERT_VECTOR_ELT: return legalizeInsertVectorElt(MI, MRI, MIRBuilder); + case TargetOpcode::G_GLOBAL_VALUE: + return legalizeGlobalValue(MI, MRI, MIRBuilder); default: return false; } @@ -1208,6 +1213,43 @@ return true; } +bool AMDGPULegalizerInfo::legalizeGlobalValue( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned AS = Ty.getAddressSpace(); + + const GlobalValue *GV = MI.getOperand(1).getGlobal(); + MachineFunction &MF = B.getMF(); + SIMachineFunctionInfo *MFI = MF.getInfo(); + + if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) { + B.setInstr(MI); + + if (!MFI->isEntryFunction()) { + const Function &Fn = MF.getFunction(); + DiagnosticInfoUnsupported BadLDSDecl( + Fn, "local memory global used by non-kernel function", MI.getDebugLoc()); + Fn.getContext().diagnose(BadLDSDecl); + } + + // TODO: We could emit code to handle the initialization somewhere. + if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) { + B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(), *GV)); + MI.eraseFromParent(); + return true; + } + } else + return false; + + const Function &Fn = MF.getFunction(); + DiagnosticInfoUnsupported BadInit( + Fn, "unsupported initializer for address space", MI.getDebugLoc()); + Fn.getContext().diagnose(BadInit); + return true; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI) { Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1733,6 +1733,7 @@ case AMDGPU::G_FCONSTANT: case AMDGPU::G_CONSTANT: case AMDGPU::G_FRAME_INDEX: + case AMDGPU::G_GLOBAL_VALUE: case AMDGPU::G_BLOCK_ADDR: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); Index: test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll @@ -0,0 +1,13 @@ +; Runs original SDAG test with -global-isel + +; RUN: not llc -global-isel -mtriple=amdgcn-amd-amdhsa -o /dev/null < %S/../lds-global-non-entry-func.ll 2>&1 | FileCheck %s + +@lds = internal addrspace(3) global float undef, align 4 + +; CHECK: error: :0:0: in function func_use_lds_global void (): local memory global used by non-kernel function +; CHECK-NOT: error +; CHECK-NOT: ERROR +define void @func_use_lds_global() { + store float 0.0, float addrspace(3)* @lds, align 4 + ret void +} Index: test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s +; TODO: Replace with existing DAG tests + +@lds_512_4 = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4 +@lds_4_8 = addrspace(3) global i32 undef, align 8 + +define amdgpu_kernel void @use_lds_globals(i32 addrspace(1)* %out, i32 addrspace(3)* %in) #0 { +; CHECK-LABEL: use_lds_globals: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: s_add_u32 s2, 4, 4 +; CHECK-NEXT: v_mov_b32_e32 v2, s2 +; CHECK-NEXT: s_mov_b32 m0, -1 +; CHECK-NEXT: ds_read_b32 v2, v2 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_add_u32 s0, s0, 4 +; CHECK-NEXT: s_addc_u32 s1, s1, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: flat_store_dword v[0:1], v2 +; CHECK-NEXT: v_mov_b32_e32 v0, 9 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: ds_write_b32 v1, v0 +; CHECK-NEXT: s_endpgm +entry: + %tmp0 = getelementptr [128 x i32], [128 x i32] addrspace(3)* @lds_512_4, i32 0, i32 1 + %tmp1 = load i32, i32 addrspace(3)* %tmp0 + %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1 + store i32 %tmp1, i32 addrspace(1)* %tmp2 + store i32 9, i32 addrspace(3)* @lds_4_8 + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/GlobalISel/lds-size.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/lds-size.ll @@ -0,0 +1 @@ +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa < %S/../lds-size.ll | FileCheck -check-prefix=ALL -check-prefix=HSA %S/../lds-size.ll Index: test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll @@ -0,0 +1,5 @@ +; RUN: not llc -global-isel -march=amdgcn -mcpu=tonga < %S/../lds-zero-initializer.ll 2>&1 | FileCheck %s + +; FIXME: Select should succeed +; CHECK: error: :0:0: in function load_zeroinit_lds_global void (i32 addrspace(1)*, i1): unsupported initializer for address space +; CHECK: LLVM ERROR: cannot select: %16:sreg_32(p3) = G_GLOBAL_VALUE @lds (in function: load_zeroinit_lds_global)