Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -383,6 +383,9 @@ // A uniform kernel return that terminates the wavefront. ENDPGM, + // s_endpgm, but we may want to insert it in the middle of the block. + ENDPGM_TRAP, + // Return to a shader part's epilog code. RETURN_TO_EPILOG, Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4607,6 +4607,7 @@ NODE_NAME_CASE(RET_GLUE) NODE_NAME_CASE(RETURN_TO_EPILOG) NODE_NAME_CASE(ENDPGM) + NODE_NAME_CASE(ENDPGM_TRAP) NODE_NAME_CASE(DWORDADDR) NODE_NAME_CASE(FRACT) NODE_NAME_CASE(SETCC) Index: llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -39,6 +39,7 @@ [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>] >; +def ImmOp : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def AMDGPUIfOp : SDTypeProfile<1, 2, @@ -352,6 +353,8 @@ //===----------------------------------------------------------------------===// def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; +def AMDGPUendpgm_trap : SDNode<"AMDGPUISD::ENDPGM_TRAP", SDTNone, + [SDNPHasChain]>; def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -5661,7 +5661,29 @@ bool AMDGPULegalizerInfo::legalizeTrapEndpgm( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.buildInstr(AMDGPU::S_ENDPGM).addImm(0); + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock &BB = B.getMBB(); + MachineFunction *MF = BB.getParent(); + + if (BB.succ_empty() && std::next(MI.getIterator()) == BB.end()) { + BuildMI(BB, BB.end(), DL, B.getTII().get(AMDGPU::S_ENDPGM)) + .addImm(0); + MI.eraseFromParent(); + return true; + } + + // We need a block split to make the real endpgm a terminator. We also don't + // want to break phis in successor blocks, so we can't just delete to the + // end of the block. + BB.splitAt(MI, false /*UpdateLiveIns*/); + MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); + MF->push_back(TrapBB); + BuildMI(*TrapBB, TrapBB->end(), DL, B.getTII().get(AMDGPU::S_ENDPGM)) + .addImm(0); + BuildMI(BB, &MI, DL, B.getTII().get(AMDGPU::S_CBRANCH_EXECNZ)) + .addMBB(TrapBB); + + BB.addSuccessor(TrapBB); MI.eraseFromParent(); return true; } Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4549,6 +4549,30 @@ MI.eraseFromParent(); return BB; } + case AMDGPU::ENDPGM_TRAP: { + const DebugLoc &DL = MI.getDebugLoc(); + if (BB->succ_empty() && std::next(MI.getIterator()) == BB->end()) { + MI.setDesc(TII->get(AMDGPU::S_ENDPGM)); + MI.addOperand(MachineOperand::CreateImm(0)); + return BB; + } + + // We need a block split to make the real endpgm a terminator. We also don't + // want to break phis in successor blocks, so we can't just delete to the + // end of the block. + + MachineBasicBlock *SplitBB = BB->splitAt(MI, false /*UpdateLiveIns*/); + MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); + MF->push_back(TrapBB); + BuildMI(*TrapBB, TrapBB->end(), DL, TII->get(AMDGPU::S_ENDPGM)) + .addImm(0); + BuildMI(*BB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addMBB(TrapBB); + + BB->addSuccessor(TrapBB); + MI.eraseFromParent(); + return SplitBB; + } default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } @@ -5572,7 +5596,7 @@ SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Chain = Op.getOperand(0); - return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); + return DAG.getNode(AMDGPUISD::ENDPGM_TRAP, SL, MVT::Other, Chain); } SDValue SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT, Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -95,6 +95,16 @@ //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// + +// Insert a branch to an endpgm block to use as a fallback trap. +def ENDPGM_TRAP : SPseudoInstSI< + (outs), (ins), + [(AMDGPUendpgm_trap)], + "ENDPGM_TRAP"> { + let hasSideEffects = 1; + let usesCustomInserter = 1; +} + def ATOMIC_FENCE : SPseudoInstSI< (outs), (ins i32imm:$ordering, i32imm:$scope), [(atomic_fence (i32 timm:$ordering), (i32 timm:$scope))], Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir @@ -0,0 +1,64 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s + +# Check edge cases for trap legalization + +--- +name: test_fallthrough_after_trap +body: | + ; GCN-LABEL: name: test_fallthrough_after_trap + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + %0:_(s8) = G_CONSTANT i8 0 + %1:_(p1) = G_CONSTANT i64 0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap) + + bb.1: + G_STORE %0, %1 :: (store 1, addrspace 1) + +... + +--- +name: test_def_fallthrough_after_trap +body: | + ; GCN-LABEL: name: test_def_fallthrough_after_trap + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.3, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + %0:_(s8) = G_CONSTANT i8 0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap) + %1:_(p1) = G_CONSTANT i64 0 + + bb.1: + G_STORE %0, %1 :: (store 1, addrspace 1) + +... Index: llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll +++ llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll @@ -11,6 +11,17 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-enable-lower-module-lds=false %s 2> %t | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s ; RUN: FileCheck -check-prefix=ERR %s < %t +; Test there's no verifier error if a function directly uses LDS and +; we emit a trap. The s_endpgm needs to be emitted in a terminator +; position. + +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s 2> %t | FileCheck -check-prefixes=CHECK,SDAG %s +; RUN: FileCheck -check-prefix=ERR %s < %t + +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s 2> %t | FileCheck -check-prefixes=CHECK,GISEL %s +; RUN: FileCheck -check-prefix=ERR %s < %t + + @lds = internal addrspace(3) global float poison, align 4 ; FIXME: The DAG should probably move the trap before the access. @@ -56,6 +67,30 @@ ; GFX9-GISEL-NEXT: ds_write_b32 v0, v0 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: func_use_lds_global: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: ds_write_b32 v0, v0 +; SDAG-NEXT: s_cbranch_execnz .LBB0_2 +; SDAG-NEXT: ; %bb.1: +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: s_setpc_b64 s[30:31] +; SDAG-NEXT: .LBB0_2: +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: func_use_lds_global: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_cbranch_execnz .LBB0_2 +; GISEL-NEXT: ; %bb.1: +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: ds_write_b32 v0, v0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: .LBB0_2: +; GISEL-NEXT: s_endpgm store volatile float 0.0, ptr addrspace(3) @lds, align 4 ret void } @@ -91,9 +126,467 @@ ; GFX9-GISEL-NEXT: global_store_dword v[0:1], v0, off ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: func_use_lds_global_constexpr_cast: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: s_cbranch_execnz .LBB1_2 +; SDAG-NEXT: ; %bb.1: +; SDAG-NEXT: s_setpc_b64 s[30:31] +; SDAG-NEXT: .LBB1_2: +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: func_use_lds_global_constexpr_cast: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_cbranch_execnz .LBB1_2 +; GISEL-NEXT: ; %bb.1: +; GISEL-NEXT: global_store_dword v[0:1], v0, off +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: .LBB1_2: +; GISEL-NEXT: s_endpgm store volatile i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) poison, align 4 ret void } + +; ERR: warning: :0:0: in function func_uses_lds_multi void (i1): local memory global used by non-kernel function +define void @func_uses_lds_multi(i1 %cond) { +; GFX8-SDAG-LABEL: func_uses_lds_multi: +; GFX8-SDAG: ; %bb.0: ; %entry +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX8-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GFX8-SDAG-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GFX8-SDAG-NEXT: s_mov_b32 m0, -1 +; GFX8-SDAG-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GFX8-SDAG-NEXT: s_xor_b64 s[4:5], exec, s[8:9] +; GFX8-SDAG-NEXT: s_cbranch_execz .LBB2_2 +; GFX8-SDAG-NEXT: ; %bb.1: ; %bb1 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 1 +; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 +; GFX8-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-SDAG-NEXT: s_trap 2 +; GFX8-SDAG-NEXT: .LBB2_2: ; %Flow +; GFX8-SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX8-SDAG-NEXT: s_cbranch_execz .LBB2_4 +; GFX8-SDAG-NEXT: ; %bb.3: ; %bb0 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 +; GFX8-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-SDAG-NEXT: s_trap 2 +; GFX8-SDAG-NEXT: .LBB2_4: ; %ret +; GFX8-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 2 +; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 +; GFX8-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-SDAG-NEXT: s_trap 2 +; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: func_uses_lds_multi: +; GFX8-GISEL: ; %bb.0: ; %entry +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX8-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX8-GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GFX8-GISEL-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GFX8-GISEL-NEXT: s_xor_b64 s[4:5], exec, s[8:9] +; GFX8-GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8-GISEL-NEXT: ; %bb.1: ; %bb1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 1 +; GFX8-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX8-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-GISEL-NEXT: s_trap 2 +; GFX8-GISEL-NEXT: ds_write_b32 v0, v0 +; GFX8-GISEL-NEXT: .LBB2_2: ; %Flow +; GFX8-GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX8-GISEL-NEXT: s_cbranch_execz .LBB2_4 +; GFX8-GISEL-NEXT: ; %bb.3: ; %bb0 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX8-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-GISEL-NEXT: s_trap 2 +; GFX8-GISEL-NEXT: ds_write_b32 v0, v0 +; GFX8-GISEL-NEXT: .LBB2_4: ; %ret +; GFX8-GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 2 +; GFX8-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX8-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-GISEL-NEXT: s_trap 2 +; GFX8-GISEL-NEXT: ds_write_b32 v0, v0 +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: func_uses_lds_multi: +; GFX9-SDAG: ; %bb.0: ; %entry +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GFX9-SDAG-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GFX9-SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GFX9-SDAG-NEXT: s_xor_b64 s[4:5], exec, s[6:7] +; GFX9-SDAG-NEXT: s_cbranch_execz .LBB2_2 +; GFX9-SDAG-NEXT: ; %bb.1: ; %bb1 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1 +; GFX9-SDAG-NEXT: ds_write_b32 v0, v0 +; GFX9-SDAG-NEXT: s_trap 2 +; GFX9-SDAG-NEXT: .LBB2_2: ; %Flow +; GFX9-SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX9-SDAG-NEXT: s_cbranch_execz .LBB2_4 +; GFX9-SDAG-NEXT: ; %bb.3: ; %bb0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-SDAG-NEXT: ds_write_b32 v0, v0 +; GFX9-SDAG-NEXT: s_trap 2 +; GFX9-SDAG-NEXT: .LBB2_4: ; %ret +; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 2 +; GFX9-SDAG-NEXT: ds_write_b32 v0, v0 +; GFX9-SDAG-NEXT: s_trap 2 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: func_uses_lds_multi: +; GFX9-GISEL: ; %bb.0: ; %entry +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GFX9-GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GFX9-GISEL-NEXT: s_xor_b64 s[4:5], exec, s[6:7] +; GFX9-GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9-GISEL-NEXT: ; %bb.1: ; %bb1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 1 +; GFX9-GISEL-NEXT: s_trap 2 +; GFX9-GISEL-NEXT: ds_write_b32 v0, v0 +; GFX9-GISEL-NEXT: .LBB2_2: ; %Flow +; GFX9-GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX9-GISEL-NEXT: s_cbranch_execz .LBB2_4 +; GFX9-GISEL-NEXT: ; %bb.3: ; %bb0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-GISEL-NEXT: s_trap 2 +; GFX9-GISEL-NEXT: ds_write_b32 v0, v0 +; GFX9-GISEL-NEXT: .LBB2_4: ; %ret +; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 2 +; GFX9-GISEL-NEXT: s_trap 2 +; GFX9-GISEL-NEXT: ds_write_b32 v0, v0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: func_uses_lds_multi: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: s_xor_b64 s[4:5], vcc, -1 +; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; SDAG-NEXT: s_xor_b64 s[4:5], exec, s[6:7] +; SDAG-NEXT: s_cbranch_execz .LBB2_2 +; SDAG-NEXT: ; %bb.1: ; %bb1 +; SDAG-NEXT: v_mov_b32_e32 v0, 1 +; SDAG-NEXT: ds_write_b32 v0, v0 +; SDAG-NEXT: s_cbranch_execnz .LBB2_6 +; SDAG-NEXT: .LBB2_2: ; %Flow +; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; SDAG-NEXT: s_cbranch_execz .LBB2_4 +; SDAG-NEXT: ; %bb.3: ; %bb0 +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: ds_write_b32 v0, v0 +; SDAG-NEXT: s_cbranch_execnz .LBB2_6 +; SDAG-NEXT: .LBB2_4: ; %ret +; SDAG-NEXT: s_or_b64 exec, exec, s[4:5] +; SDAG-NEXT: v_mov_b32_e32 v0, 2 +; SDAG-NEXT: ds_write_b32 v0, v0 +; SDAG-NEXT: s_cbranch_execnz .LBB2_6 +; SDAG-NEXT: ; %bb.5: ; %ret +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: s_setpc_b64 s[30:31] +; SDAG-NEXT: .LBB2_6: +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: func_uses_lds_multi: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[6:7] +; GISEL-NEXT: s_cbranch_execz .LBB2_3 +; GISEL-NEXT: ; %bb.1: ; %bb1 +; GISEL-NEXT: s_cbranch_execnz .LBB2_8 +; GISEL-NEXT: ; %bb.2: ; %bb1 +; GISEL-NEXT: v_mov_b32_e32 v0, 1 +; GISEL-NEXT: ds_write_b32 v0, v0 +; GISEL-NEXT: .LBB2_3: ; %Flow +; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GISEL-NEXT: s_cbranch_execz .LBB2_6 +; GISEL-NEXT: ; %bb.4: ; %bb0 +; GISEL-NEXT: s_cbranch_execnz .LBB2_8 +; GISEL-NEXT: ; %bb.5: ; %bb0 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: ds_write_b32 v0, v0 +; GISEL-NEXT: .LBB2_6: ; %ret +; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_cbranch_execnz .LBB2_8 +; GISEL-NEXT: ; %bb.7: ; %ret +; GISEL-NEXT: v_mov_b32_e32 v0, 2 +; GISEL-NEXT: ds_write_b32 v0, v0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: .LBB2_8: +; GISEL-NEXT: s_endpgm +entry: + br i1 %cond, label %bb0, label %bb1 + +bb0: + store volatile i32 0, ptr addrspace(3) @lds, align 4 + br label %ret + +bb1: + store volatile i32 1, ptr addrspace(3) @lds, align 4 + br label %ret + +ret: + store volatile i32 2, ptr addrspace(3) @lds, align 4 + ret void +} + +; ERR: warning: :0:0: in function func_uses_lds_code_after void (ptr addrspace(1)): local memory global used by non-kernel function +define void @func_uses_lds_code_after(ptr addrspace(1) %ptr) { +; GFX8-SDAG-LABEL: func_uses_lds_code_after: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX8-SDAG-NEXT: s_mov_b32 m0, -1 +; GFX8-SDAG-NEXT: ds_write_b32 v0, v2 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 1 +; GFX8-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-SDAG-NEXT: s_trap 2 +; GFX8-SDAG-NEXT: flat_store_dword v[0:1], v2 +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: func_uses_lds_code_after: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX8-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX8-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-GISEL-NEXT: s_trap 2 +; GFX8-GISEL-NEXT: ds_write_b32 v0, v2 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: func_uses_lds_code_after: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-SDAG-NEXT: ds_write_b32 v0, v2 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 1 +; GFX9-SDAG-NEXT: s_trap 2 +; GFX9-SDAG-NEXT: global_store_dword v[0:1], v2, off +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: func_uses_lds_code_after: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: s_trap 2 +; GFX9-GISEL-NEXT: ds_write_b32 v0, v2 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GFX9-GISEL-NEXT: global_store_dword v[0:1], v2, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: func_uses_lds_code_after: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v2, 0 +; SDAG-NEXT: ds_write_b32 v0, v2 +; SDAG-NEXT: s_cbranch_execnz .LBB3_2 +; SDAG-NEXT: ; %bb.1: +; SDAG-NEXT: v_mov_b32_e32 v2, 1 +; SDAG-NEXT: global_store_dword v[0:1], v2, off +; SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SDAG-NEXT: s_setpc_b64 s[30:31] +; SDAG-NEXT: .LBB3_2: +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: func_uses_lds_code_after: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_cbranch_execnz .LBB3_2 +; GISEL-NEXT: ; %bb.1: +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: ds_write_b32 v0, v2 +; GISEL-NEXT: v_mov_b32_e32 v2, 1 +; GISEL-NEXT: global_store_dword v[0:1], v2, off +; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: .LBB3_2: +; GISEL-NEXT: s_endpgm + store volatile i32 0, ptr addrspace(3) @lds, align 4 + store volatile i32 1, ptr addrspace(1) %ptr, align 4 + ret void +} + +; ERR: warning: :0:0: in function func_uses_lds_phi_after i32 (i1, ptr addrspace(1)): local memory global used by non-kernel function +define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { +; GFX8-SDAG-LABEL: func_uses_lds_phi_after: +; GFX8-SDAG: ; %bb.0: ; %entry +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, v0 +; GFX8-SDAG-NEXT: flat_load_dword v0, v[1:2] glc +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX8-SDAG-NEXT: v_and_b32_e32 v3, 1, v3 +; GFX8-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 +; GFX8-SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX8-SDAG-NEXT: s_cbranch_execz .LBB4_2 +; GFX8-SDAG-NEXT: ; %bb.1: ; %use.bb +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-SDAG-NEXT: s_mov_b32 m0, -1 +; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 +; GFX8-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-SDAG-NEXT: s_trap 2 +; GFX8-SDAG-NEXT: flat_load_dword v0, v[1:2] glc +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX8-SDAG-NEXT: .LBB4_2: ; %ret +; GFX8-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: func_uses_lds_phi_after: +; GFX8-GISEL: ; %bb.0: ; %entry +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, v0 +; GFX8-GISEL-NEXT: flat_load_dword v0, v[1:2] glc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_and_b32_e32 v3, 1, v3 +; GFX8-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; GFX8-GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX8-GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8-GISEL-NEXT: ; %bb.1: ; %use.bb +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX8-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-GISEL-NEXT: s_trap 2 +; GFX8-GISEL-NEXT: ds_write_b32 v0, v0 +; GFX8-GISEL-NEXT: flat_load_dword v0, v[1:2] glc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: .LBB4_2: ; %ret +; GFX8-GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: func_uses_lds_phi_after: +; GFX9-SDAG: ; %bb.0: ; %entry +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, v0 +; GFX9-SDAG-NEXT: global_load_dword v0, v[1:2], off glc +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 1, v3 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 +; GFX9-SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-SDAG-NEXT: s_cbranch_execz .LBB4_2 +; GFX9-SDAG-NEXT: ; %bb.1: ; %use.bb +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-SDAG-NEXT: ds_write_b32 v0, v0 +; GFX9-SDAG-NEXT: s_trap 2 +; GFX9-SDAG-NEXT: global_load_dword v0, v[1:2], off glc +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9-SDAG-NEXT: .LBB4_2: ; %ret +; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: func_uses_lds_phi_after: +; GFX9-GISEL: ; %bb.0: ; %entry +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, v0 +; GFX9-GISEL-NEXT: global_load_dword v0, v[1:2], off glc +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_and_b32_e32 v3, 1, v3 +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; GFX9-GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9-GISEL-NEXT: ; %bb.1: ; %use.bb +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-GISEL-NEXT: s_trap 2 +; GFX9-GISEL-NEXT: ds_write_b32 v0, v0 +; GFX9-GISEL-NEXT: global_load_dword v0, v[1:2], off glc +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: .LBB4_2: ; %ret +; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: func_uses_lds_phi_after: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v3, v0 +; SDAG-NEXT: global_load_dword v0, v[1:2], off glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v3, 1, v3 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 +; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc +; SDAG-NEXT: s_cbranch_execz .LBB4_3 +; SDAG-NEXT: ; %bb.1: ; %use.bb +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: ds_write_b32 v0, v0 +; SDAG-NEXT: s_cbranch_execnz .LBB4_4 +; SDAG-NEXT: ; %bb.2: ; %use.bb +; SDAG-NEXT: global_load_dword v0, v[1:2], off glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: .LBB4_3: ; %ret +; SDAG-NEXT: s_or_b64 exec, exec, s[4:5] +; SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SDAG-NEXT: s_setpc_b64 s[30:31] +; SDAG-NEXT: .LBB4_4: +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: func_uses_lds_phi_after: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v3, v0 +; GISEL-NEXT: global_load_dword v0, v[1:2], off glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v3, 1, v3 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GISEL-NEXT: s_cbranch_execz .LBB4_3 +; GISEL-NEXT: ; %bb.1: ; %use.bb +; GISEL-NEXT: s_cbranch_execnz .LBB4_4 +; GISEL-NEXT: ; %bb.2: ; %use.bb +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: ds_write_b32 v0, v0 +; GISEL-NEXT: global_load_dword v0, v[1:2], off glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: .LBB4_3: ; %ret +; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: .LBB4_4: +; GISEL-NEXT: s_endpgm +entry: + %entry.load = load volatile i32, ptr addrspace(1) %ptr + br i1 %cond, label %use.bb, label %ret + +use.bb: + store volatile i32 0, ptr addrspace(3) @lds, align 4 + %use.bb.load = load volatile i32, ptr addrspace(1) %ptr + br label %ret + +ret: + %phi = phi i32 [ %entry.load, %entry ], [ %use.bb.load, %use.bb ] + ret i32 %phi +} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} ; GFX8: {{.*}} ; GFX9: {{.*}} Index: llvm/test/CodeGen/AMDGPU/trap.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/trap.ll +++ llvm/test/CodeGen/AMDGPU/trap.ll @@ -121,6 +121,29 @@ ret void } +; GCN-LABEL: {{^}}non_entry_trap_no_unreachable: +; TRAP-BIT: enable_trap_handler = 1 +; NO-TRAP-BIT: enable_trap_handler = 0 + +; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap +; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-NEXT: s_trap 2 +define amdgpu_kernel void @non_entry_trap_no_unreachable(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { +entry: + %tmp29 = load volatile i32, ptr addrspace(1) %arg0 + %cmp = icmp eq i32 %tmp29, -1 + br i1 %cmp, label %ret, label %trap + +trap: + call void @llvm.trap() + store volatile i32 1234, ptr addrspace(3) null + br label %ret + +ret: + store volatile i32 3, ptr addrspace(1) %arg0 + ret void +} + attributes #0 = { nounwind noreturn } attributes #1 = { nounwind }