Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -566,6 +566,31 @@ case Intrinsic::minnum: case Intrinsic::amdgcn_cvt_pkrtz: return selectImpl(I, CoverageInfo); + case Intrinsic::amdgcn_if_break: { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick + // SelectionDAG uses for wave32 vs wave64. + BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK)) + .add(I.getOperand(0)) + .add(I.getOperand(2)) + .add(I.getOperand(3)); + + Register DstReg = I.getOperand(0).getReg(); + Register Src0Reg = I.getOperand(2).getReg(); + Register Src1Reg = I.getOperand(3).getReg(); + + I.eraseFromParent(); + + for (Register Reg : { DstReg, Src0Reg, Src1Reg }) { + if (!MRI.getRegClassOrNull(Reg)) + MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); + } + + return true; + } default: return selectImpl(I, CoverageInfo); } Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1985,6 +1985,13 @@ OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize); break; } + case Intrinsic::amdgcn_if_break: { + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); + OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + break; + } } break; } Index: test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { +; GCN-LABEL: test_wave32: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s0, s[4:5], 0x0 +; GCN-NEXT: s_load_dword s1, s[4:5], 0x24 +; GCN-NEXT: ; implicit-def: $vcc_hi +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_eq_u32 s0, 0 +; GCN-NEXT: s_cselect_b32 s0, -1, 0 +; GCN-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: s_endpgm +entry: + %cond = icmp eq i32 %arg0, 0 + %break = call i32 @llvm.amdgcn.if.break.i32(i1 %cond, i32 %saved) + store volatile i32 %break, i32 addrspace(1)* undef + ret void +} + +declare i32 @llvm.amdgcn.if.break.i32(i1, i32) Index: test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) { +; GCN-LABEL: test_wave64: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s2, s[4:5], 0x0 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_eq_u32 s2, 0 +; GCN-NEXT: s_cselect_b32 s2, -1, 0 +; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2 +; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GCN-NEXT: s_endpgm +entry: + %cond = icmp eq i32 %arg0, 0 + %break = call i64 @llvm.amdgcn.if.break.i64(i1 %cond, i64 %saved) + store volatile i64 %break, i64 addrspace(1)* undef + ret void +} + +declare i64 @llvm.amdgcn.if.break.i64(i1, i64)