Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -655,6 +655,20 @@ I.eraseFromParent(); return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); } + case Intrinsic::amdgcn_end_cf: { + // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick + // SelectionDAG uses for wave32 vs wave64. + BuildMI(*BB, &I, I.getDebugLoc(), + TII.get(AMDGPU::SI_END_CF)) + .add(I.getOperand(1)); + + Register Reg = I.getOperand(1).getReg(); + I.eraseFromParent(); + + if (!MRI.getRegClassOrNull(Reg)) + MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); + return true; + } default: return selectImpl(I, CoverageInfo); } Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2077,6 +2077,11 @@ OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32); break; } + case Intrinsic::amdgcn_end_cf: { + unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + break; + } } break; } Index: test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { +; GCN-LABEL: test_wave32: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s1, s[4:5], 0x0 +; GCN-NEXT: s_load_dword s0, s[4:5], 0x24 +; GCN-NEXT: ; implicit-def: $vcc_hi +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_eq_u32 s1, 0 +; GCN-NEXT: s_cbranch_scc0 BB0_2 +; GCN-NEXT: ; %bb.1: ; %mid +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: BB0_2: ; %bb +; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: s_endpgm +entry: + %cond = icmp eq i32 %arg0, 0 + br i1 %cond, label %mid, label %bb + +mid: + store volatile i32 0, i32 addrspace(1)* undef + br label %bb + +bb: + call void @llvm.amdgcn.end.cf.i32(i32 %saved) + store volatile i32 0, i32 addrspace(1)* undef + ret void +} + +declare void @llvm.amdgcn.end.cf.i32(i32 %val) Index: test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @test_wave64(i32 %arg0, i64 %saved) { +; GCN-LABEL: test_wave64: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s2, s[4:5], 0x0 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_eq_u32 s2, 0 +; GCN-NEXT: s_cbranch_scc0 BB0_2 +; GCN-NEXT: ; %bb.1: ; %mid +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: BB0_2: ; %bb +; GCN-NEXT: s_or_b64 exec, exec, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: s_endpgm +entry: + %cond = icmp eq i32 %arg0, 0 + br i1 %cond, label %mid, label %bb + +mid: + store volatile i32 0, i32 addrspace(1)* undef + br label %bb + +bb: + call void @llvm.amdgcn.end.cf.i64(i64 %saved) + store volatile i32 0, i32 addrspace(1)* undef + ret void +} + +declare void @llvm.amdgcn.end.cf.i64(i64 %val)