diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1378,16 +1378,11 @@ "Do not know what to do with an non-zero offset"); // TODO: We could emit code to handle the initialization somewhere. - if (!hasDefinedInitializer(GV)) { - unsigned Offset = MFI->allocateLDSGlobal(DL, *cast(GV)); - return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); - } + // We ignore the initializer for now and legalize it to allow selection. + // The initializer will anyway get errored out during assembly emission. + unsigned Offset = MFI->allocateLDSGlobal(DL, *cast(GV)); + return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); } - - const Function &Fn = DAG.getMachineFunction().getFunction(); - DiagnosticInfoUnsupported BadInit( - Fn, "unsupported initializer for address space", SDLoc(Op).getDebugLoc()); - DAG.getContext()->diagnose(BadInit); return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2420,43 +2420,36 @@ } // TODO: We could emit code to handle the initialization somewhere. - if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) { - const SITargetLowering *TLI = ST.getTargetLowering(); - if (!TLI->shouldUseLDSConstAddress(GV)) { - MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO); - return true; // Leave in place; - } + // We ignore the initializer for now and legalize it to allow selection. + // The initializer will anyway get errored out during assembly emission. + const SITargetLowering *TLI = ST.getTargetLowering(); + if (!TLI->shouldUseLDSConstAddress(GV)) { + MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO); + return true; // Leave in place; + } - if (AS == AMDGPUAS::LOCAL_ADDRESS && GV->hasExternalLinkage()) { - Type *Ty = GV->getValueType(); - // HIP uses an unsized array `extern __shared__ T s[]` or similar - // zero-sized type in other languages to declare the dynamic shared - // memory which size is not known at the compile time. They will be - // allocated by the runtime and placed directly after the static - // allocated ones. They all share the same offset. - if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) { - // Adjust alignment for that dynamic shared memory array. - MFI->setDynLDSAlign(B.getDataLayout(), *cast(GV)); - LLT S32 = LLT::scalar(32); - auto Sz = - B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false); - B.buildIntToPtr(DstReg, Sz); - MI.eraseFromParent(); - return true; - } + if (AS == AMDGPUAS::LOCAL_ADDRESS && GV->hasExternalLinkage()) { + Type *Ty = GV->getValueType(); + // HIP uses an unsized array `extern __shared__ T s[]` or similar + // zero-sized type in other languages to declare the dynamic shared + // memory which size is not known at the compile time. They will be + // allocated by the runtime and placed directly after the static + // allocated ones. They all share the same offset. + if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) { + // Adjust alignment for that dynamic shared memory array. + MFI->setDynLDSAlign(B.getDataLayout(), *cast(GV)); + LLT S32 = LLT::scalar(32); + auto Sz = + B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false); + B.buildIntToPtr(DstReg, Sz); + MI.eraseFromParent(); + return true; } - - B.buildConstant( - DstReg, - MFI->allocateLDSGlobal(B.getDataLayout(), *cast(GV))); - MI.eraseFromParent(); - return true; } - const Function &Fn = MF.getFunction(); - DiagnosticInfoUnsupported BadInit( - Fn, "unsupported initializer for address space", MI.getDebugLoc()); - Fn.getContext().diagnose(BadInit); + B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(), + *cast(GV))); + MI.eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll @@ -1,3 +1,38 @@ -; RUN: not llc -global-isel -march=amdgcn -mcpu=tonga < %S/../lds-zero-initializer.ll 2>&1 | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -global-isel -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: llc -march=amdgcn -mcpu=tonga -global-isel -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s -; CHECK: :0: error: lds: unsupported initializer for address space +; RUN: not llc -march=amdgcn -mcpu=tahiti -global-isel < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tonga -global-isel < %s 2>&1 | FileCheck %s + +; CHECK: error: lds: unsupported initializer for address space + +@lds = addrspace(3) global [256 x i32] zeroinitializer + +define amdgpu_kernel void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) { + ; GCN-LABEL: name: load_zeroinit_lds_global + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 40 + ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @lds + ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 + ; GFX9: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 36, 0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_U32_]] + ; GCN: $m0 = S_MOV_B32 -1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX8: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY1]], 0, 0, implicit $m0, implicit $exec + ; GFX9: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY1]], 40, 0, implicit $m0, implicit $exec + ; GFX8: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GFX8: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_LOAD_DWORDX2_IMM]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX8: BUFFER_STORE_DWORD_OFFSET [[DS_READ_B32_]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX9: FLAT_STORE_DWORD [[COPY2]], [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN: S_ENDPGM 0 + %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10 + %ld = load i32, i32 addrspace(3)* %gep + store i32 %ld, i32 addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll --- a/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll @@ -1,11 +1,32 @@ -; RUN: not llc -march=amdgcn -mcpu=tahiti -filetype=null < %s 2>&1 | FileCheck %s -; RUN: not llc -march=amdgcn -mcpu=tonga -filetype=null < %s 2>&1 | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -stop-after=amdgpu-isel -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: llc -march=amdgcn -mcpu=tonga -stop-after=amdgpu-isel -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s + +; RUN: not llc -march=amdgcn -mcpu=tahiti < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s ; CHECK: error: lds: unsupported initializer for address space @lds = addrspace(3) global [256 x i32] zeroinitializer define amdgpu_kernel void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) { + ; GCN-LABEL: name: load_zeroinit_lds_global + ; GCN: bb.0 (%ir-block.0): + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 + ; GFX9: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 + ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @lds, implicit $exec + ; GCN: SI_INIT_M0 -1, implicit-def $m0 + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 killed [[V_MOV_B32_e32_]], 40, 0, implicit $m0, implicit $exec + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8: BUFFER_STORE_DWORD_OFFSET killed [[DS_READ_B32_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9: FLAT_STORE_DWORD killed [[COPY1]], killed [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN: S_ENDPGM 0 %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10 %ld = load i32, i32 addrspace(3)* %gep store i32 %ld, i32 addrspace(1)* %out