Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -389,6 +389,16 @@ GCCBuiltin<"__builtin_amdgcn_lerp">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +// llvm.amdgcn.icmp +def int_amdgcn_icmp : + GCCBuiltin<"__builtin_amdgcn_icmp">, + Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; + +// llvm.amdgcn.fcmp +def int_amdgcn_fcmp : + GCCBuiltin<"__builtin_amdgcn_fcmp">, + Intrinsic<[llvm_double_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_float_ty], [IntrNoMem, IntrConvergent]>; + //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -228,6 +228,7 @@ DWORDADDR, FRACT, CLAMP, + SETCC, // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. // Denormals handled on some parts. Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2804,6 +2804,7 @@ NODE_NAME_CASE(RETURN) NODE_NAME_CASE(DWORDADDR) NODE_NAME_CASE(FRACT) + NODE_NAME_CASE(SETCC) NODE_NAME_CASE(CLAMP) NODE_NAME_CASE(COS_HW) NODE_NAME_CASE(SIN_HW) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -137,6 +137,11 @@ // out = (src1 > src0) ? 1 : 0 def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; +def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc + SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> +]>; + +def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>; def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0", SDTIntToFPOp, []>; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" @@ -1651,6 +1652,12 @@ // TODO: Should this propagate fast-math-flags? switch (IntrinsicID) { + case Intrinsic::amdgcn_icmp: + case Intrinsic::amdgcn_fcmp: { + ICmpInst::Predicate ic_input = static_cast(Op.getConstantOperandVal(3)); + ISD::CondCode CCOpcode = getICmpCondCode(ic_input); + return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1), Op.getOperand(2), DAG.getCondCode(CCOpcode)); + } case Intrinsic::amdgcn_dispatch_ptr: case Intrinsic::amdgcn_queue_ptr: { if (!Subtarget->isAmdHsaOS()) { Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -2357,6 +2357,21 @@ (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0)) >; + +//===----------------------------------------------------------------------===// +// V_ICMPIntrinsic Pattern. +//===----------------------------------------------------------------------===// + +multiclass CMP_Pattern { +def : Pat < + (AMDGPUsetcc i32:$src0, i32:$src1, cond), + (inst i32:$src0, i32:$src1) +>; +} + +defm : CMP_Pattern ; +defm : CMP_Pattern ; + //===----------------------------------------------------------------------===// // SMRD Patterns //===----------------------------------------------------------------------===// Index: test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ne.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ne.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_icmp_ne: +; GCN: v_cmp_ne_i32_e64 +define void @v_icmp_ne(i64 addrspace(1)* %out, i32 %src) nounwind { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33) #0 + store i64 %result, i64 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone convergent }