Index: llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -71,7 +71,7 @@ static bool visitConstantExpr(const ConstantExpr *CE); static bool visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet &ConstantExprVisited); + SmallPtrSet &ConstantExprVisited, bool IsFunc); }; } // end anonymous namespace @@ -93,6 +93,14 @@ return castRequiresQueuePtr(ASC->getSrcAddressSpace()); } +static bool isDSAddress(const Constant *C) { + const GlobalValue *GV = dyn_cast(C); + if (!GV) + return false; + unsigned AS = GV->getAddressSpace(); + return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; +} + bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); @@ -104,7 +112,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet &ConstantExprVisited) { + SmallPtrSet &ConstantExprVisited, + bool IsFunc) { if (!ConstantExprVisited.insert(EntryC).second) return false; @@ -115,6 +124,10 @@ while (!Stack.empty()) { const Constant *C = Stack.pop_back_val(); + // We need to trap on DS globals in non-entry functions. + if (IsFunc && isDSAddress(C)) + return true; + // Check this constant expression. if (const auto *CE = dyn_cast(C)) { if (visitConstantExpr(CE)) @@ -301,7 +314,7 @@ } } - if (NeedQueuePtr || HasApertureRegs) + if (NeedQueuePtr || (!IsFunc && HasApertureRegs)) continue; if (const AddrSpaceCastInst *ASC = dyn_cast(&I)) { @@ -316,7 +329,7 @@ if (!OpC) continue; - if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) { + if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc)) { NeedQueuePtr = true; break; } Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1238,10 +1238,23 @@ if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) { if (!MFI->isEntryFunction()) { + SDLoc DL(Op); const Function &Fn = DAG.getMachineFunction().getFunction(); DiagnosticInfoUnsupported BadLDSDecl( - Fn, "local memory global used by non-kernel function", SDLoc(Op).getDebugLoc()); + Fn, "local memory global used by non-kernel function", + DL.getDebugLoc(), DS_Warning); DAG.getContext()->diagnose(BadLDSDecl); + + // We currently don't have a way to correctly allocate LDS objects that + // aren't directly associated with a kernel. We do force inlining of + // functions that use local objects. However, if these dead functions are + // not eliminated, we don't want a compile time error. Just emit a warning + // and a trap, since there should be no callable path here. + SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode()); + SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + Trap, DAG.getRoot()); + DAG.setRoot(OutputChain); + return DAG.getUNDEF(Op.getValueType()); } // XXX: What does the value of G->getOffset() mean? Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1967,8 +1967,19 @@ if (!MFI->isEntryFunction()) { const Function &Fn = MF.getFunction(); DiagnosticInfoUnsupported BadLDSDecl( - Fn, "local memory global used by non-kernel function", MI.getDebugLoc()); + Fn, "local memory global used by non-kernel function", MI.getDebugLoc(), + DS_Warning); Fn.getContext().diagnose(BadLDSDecl); + + // We currently don't have a way to correctly allocate LDS objects that + // aren't directly associated with a kernel. We do force inlining of + // functions that use local objects. However, if these dead functions are + // not eliminated, we don't want a compile time error. Just emit a warning + // and a trap, since there should be no callable path here. + B.buildIntrinsic(Intrinsic::trap, ArrayRef(), true); + B.buildUndef(DstReg); + MI.eraseFromParent(); + return true; } // TODO: We could emit code to handle the initialization somewhere. Index: llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll @@ -1,13 +1,57 @@ -; Runs original SDAG test with -global-isel +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: FileCheck -check-prefix=ERR %s < %t -; RUN: not llc -global-isel -mtriple=amdgcn-amd-amdhsa -o /dev/null < %S/../lds-global-non-entry-func.ll 2>&1 | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: FileCheck -check-prefix=ERR %s < %t @lds = internal addrspace(3) global float undef, align 4 -; CHECK: error: :0:0: in function func_use_lds_global void (): local memory global used by non-kernel function -; CHECK-NOT: error -; CHECK-NOT: ERROR +; ERR: warning: :0:0: in function func_use_lds_global void (): local memory global used by non-kernel function define void @func_use_lds_global() { +; GFX8-LABEL: func_use_lds_global: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX8-NEXT: s_trap 2 +; GFX8-NEXT: ds_write_b32 v0, v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_use_lds_global: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX9-NEXT: s_trap 2 +; GFX9-NEXT: ds_write_b32 v0, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] store float 0.0, float addrspace(3)* @lds, align 4 ret void } + +; ERR: warning: :0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function +define void @func_use_lds_global_constexpr_cast() { +; GFX8-LABEL: func_use_lds_global_constexpr_cast: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX8-NEXT: s_trap 2 +; GFX8-NEXT: flat_store_dword v[0:1], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_use_lds_global_constexpr_cast: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX9-NEXT: s_trap 2 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4 + ret void +} Index: llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll +++ llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll @@ -1,9 +1,46 @@ -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: FileCheck -check-prefix=ERR %s < %t + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: FileCheck -check-prefix=ERR %s < %t @lds = internal addrspace(3) global float undef, align 4 -; CHECK: error: :0:0: in function func_use_lds_global void (): local memory global used by non-kernel function +; ERR: warning: :0:0: in function func_use_lds_global void (): local memory global used by non-kernel function define void @func_use_lds_global() { +; GFX8-LABEL: func_use_lds_global: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_write_b32 v0, v0 +; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX8-NEXT: s_trap 2 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_use_lds_global: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: ds_write_b32 v0, v0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX9-NEXT: s_trap 2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] store float 0.0, float addrspace(3)* @lds, align 4 ret void } + +; ERR: warning: :0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function +define void @func_use_lds_global_constexpr_cast() { +; GCN-LABEL: func_use_lds_global_constexpr_cast: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b64 s[0:1], s[4:5] +; GCN-NEXT: s_trap 2 +; GCN-NEXT: s_setpc_b64 s[30:31] + store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4 + ret void +}