diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1378,16 +1378,10 @@ "Do not know what to do with an non-zero offset"); // TODO: We could emit code to handle the initialization somewhere. - if (!hasDefinedInitializer(GV)) { - unsigned Offset = MFI->allocateLDSGlobal(DL, *cast(GV)); - return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); - } + // We ignore the initializer for now and legalize it to allow selection. + unsigned Offset = MFI->allocateLDSGlobal(DL, *cast(GV)); + return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); } - - const Function &Fn = DAG.getMachineFunction().getFunction(); - DiagnosticInfoUnsupported BadInit( - Fn, "unsupported initializer for address space", SDLoc(Op).getDebugLoc()); - DAG.getContext()->diagnose(BadInit); return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2420,43 +2420,35 @@ } // TODO: We could emit code to handle the initialization somewhere. - if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) { - const SITargetLowering *TLI = ST.getTargetLowering(); - if (!TLI->shouldUseLDSConstAddress(GV)) { - MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO); - return true; // Leave in place; - } + // We ignore the initializer for now and legalize it to allow selection. + const SITargetLowering *TLI = ST.getTargetLowering(); + if (!TLI->shouldUseLDSConstAddress(GV)) { + MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO); + return true; // Leave in place; + } - if (AS == AMDGPUAS::LOCAL_ADDRESS && GV->hasExternalLinkage()) { - Type *Ty = GV->getValueType(); - // HIP uses an unsized array `extern __shared__ T s[]` or similar - // zero-sized type in other languages to declare the dynamic shared - // memory which size is not known at the compile time. They will be - // allocated by the runtime and placed directly after the static - // allocated ones. They all share the same offset. - if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) { - // Adjust alignment for that dynamic shared memory array. - MFI->setDynLDSAlign(B.getDataLayout(), *cast(GV)); - LLT S32 = LLT::scalar(32); - auto Sz = - B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false); - B.buildIntToPtr(DstReg, Sz); - MI.eraseFromParent(); - return true; - } + if (AS == AMDGPUAS::LOCAL_ADDRESS && GV->hasExternalLinkage()) { + Type *Ty = GV->getValueType(); + // HIP uses an unsized array `extern __shared__ T s[]` or similar + // zero-sized type in other languages to declare the dynamic shared + // memory which size is not known at the compile time. They will be + // allocated by the runtime and placed directly after the static + // allocated ones. They all share the same offset. + if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) { + // Adjust alignment for that dynamic shared memory array. + MFI->setDynLDSAlign(B.getDataLayout(), *cast(GV)); + LLT S32 = LLT::scalar(32); + auto Sz = + B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false); + B.buildIntToPtr(DstReg, Sz); + MI.eraseFromParent(); + return true; } - - B.buildConstant( - DstReg, - MFI->allocateLDSGlobal(B.getDataLayout(), *cast(GV))); - MI.eraseFromParent(); - return true; } - const Function &Fn = MF.getFunction(); - DiagnosticInfoUnsupported BadInit( - Fn, "unsupported initializer for address space", MI.getDebugLoc()); - Fn.getContext().diagnose(BadInit); + B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(), + *cast(GV))); + MI.eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lds-initializer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lds-initializer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lds-initializer.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s + +; Even if we don't emit an initializer for LDS variables, +; allow compiler to legalize them instead of an abort. + +@lds = addrspace(3) global i32 10 + +define amdgpu_kernel void @test_lds_initializer() { + ; CHECK-LABEL: name: test_lds_initializer + ; CHECK: bb.1.entry: + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (volatile store (s32) into @lds, addrspace 3) + ; CHECK: S_ENDPGM 0 +entry: + store volatile i32 20, i32 addrspace(3)* @lds, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/legalize-lds-initializer.ll b/llvm/test/CodeGen/AMDGPU/legalize-lds-initializer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/legalize-lds-initializer.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel -verify-machineinstrs -o - %s | FileCheck %s + +; Even if we don't emit an initializer for LDS variables, +; allow compiler to legalize them instead of an abort. + +@lds = addrspace(3) global i32 10 + +define amdgpu_kernel void @test_lds_initializer() { + ; CHECK-LABEL: name: test_lds_initializer + ; CHECK: bb.0.entry: + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec + ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: DS_WRITE_B32_gfx9 killed [[V_MOV_B32_e32_1]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile store (s32) into @lds, addrspace 3) + ; CHECK: S_ENDPGM 0 +entry: + store volatile i32 20, i32 addrspace(3)* @lds, align 4 + ret void +}