Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -106,6 +106,7 @@ bool selectInterpP1F16(MachineInstr &MI) const; bool selectDivScale(MachineInstr &MI) const; + bool selectIntrinsicIcmp(MachineInstr &MI) const; bool selectG_INTRINSIC(MachineInstr &I) const; bool selectEndCfIntrinsic(MachineInstr &MI) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -889,6 +889,8 @@ return constrainCopyLikeIntrin(I, AMDGPU::WWM); case Intrinsic::amdgcn_div_scale: return selectDivScale(I); + case Intrinsic::amdgcn_icmp: + return selectIntrinsicIcmp(I); default: return selectImpl(I, *CoverageInfo); } @@ -1009,6 +1011,34 @@ return Ret; } +bool AMDGPUInstructionSelector::selectIntrinsicIcmp(MachineInstr &I) const { + Register Dst = I.getOperand(0).getReg(); + if (isVCC(Dst, *MRI)) + return false; + + if (MRI->getType(Dst).getSizeInBits() != STI.getWavefrontSize()) + return false; + + MachineBasicBlock *BB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + Register SrcReg = I.getOperand(2).getReg(); + unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI); + auto Pred = static_cast(I.getOperand(4).getImm()); + + int Opcode = getV_CMPOpcode(Pred, Size); + if (Opcode == -1) + return false; + + MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst) + .add(I.getOperand(2)) + .add(I.getOperand(3)); + RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), *TRI.getBoolRC(), + *MRI); + bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); + I.eraseFromParent(); + return Ret; +} + bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const { // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick // SelectionDAG uses for wave32 vs wave64. Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -global-isel-abort=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_ps void @test_intr_icmp_eq_i64(i64 addrspace(1)* %out, i32 %src) #0 { +; GCN-LABEL: test_intr_icmp_eq_i64: +; GCN: ; %bb.0: +; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], 0x64, v2 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: global_store_dwordx2 v[0:1], v[2:3], off +; GCN-NEXT: s_endpgm + %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %src, i32 100, i32 32) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +define amdgpu_ps void @test_intr_icmp_ne_i32(i32 addrspace(1)* %out, i32 %src) #1 { +; GCN-LABEL: test_intr_icmp_ne_i32: +; GCN: ; %bb.0: +; GCN-NEXT: v_cmp_ne_u32_e64 s0, 0x64, v2 +; GCN-NEXT: ; implicit-def: $vcc_hi +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: global_store_dword v[0:1], v2, off +; GCN-NEXT: s_endpgm + %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 33) + store i32 %result, i32 addrspace(1)* %out + ret void +} +declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32) +declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32) +attributes #0 = { "target-features"="+wavefrontsize64" } +attributes #1 = { "target-features"="+wavefrontsize32" }