Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -117,6 +117,7 @@ setOperationAction(ISD::SETCC, MVT::i1, Promote); setOperationAction(ISD::SETCC, MVT::v2i1, Expand); setOperationAction(ISD::SETCC, MVT::v4i1, Expand); + AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand); setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); @@ -243,9 +244,6 @@ setOperationAction(ISD::UMIN, MVT::i16, Legal); setOperationAction(ISD::UMAX, MVT::i16, Legal); - setOperationAction(ISD::SETCC, MVT::i16, Promote); - AddPromotedToType(ISD::SETCC, MVT::i16, MVT::i32); - setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Promote); AddPromotedToType(ISD::SIGN_EXTEND, MVT::i16, MVT::i32); Index: lib/Target/AMDGPU/VOPCInstructions.td =================================================================== --- lib/Target/AMDGPU/VOPCInstructions.td +++ lib/Target/AMDGPU/VOPCInstructions.td @@ -147,6 +147,7 @@ def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>; def VOPC_I1_F32_F32 : VOPC_Profile<[Write32Bit], f32>; def VOPC_I1_F64_F64 : VOPC_Profile<[WriteDoubleAdd], f64>; +def VOPC_I1_I16_I16 : VOPC_Profile<[Write32Bit], i16>; def VOPC_I1_I32_I32 : VOPC_Profile<[Write32Bit], i32>; def VOPC_I1_I64_I64 : VOPC_Profile<[Write64Bit], i64>; @@ -159,6 +160,9 @@ multiclass VOPC_F64 : VOPC_Pseudos ; +multiclass VOPC_I16 : + VOPC_Pseudos ; + multiclass VOPC_I32 : VOPC_Pseudos ; @@ -325,7 +329,7 @@ } // End SubtargetPredicate = isSICI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { defm V_CMP_F_F16 : VOPC_F16 <"v_cmp_f_f16">; defm V_CMP_LT_F16 : VOPC_F16 <"v_cmp_lt_f16", COND_OLT, "v_cmp_gt_f16">; @@ -361,7 +365,25 @@ defm V_CMPX_NLT_F16 : VOPCX_F16 <"v_cmpx_nlt_f16">; defm V_CMPX_TRU_F16 : VOPCX_F16 <"v_cmpx_tru_f16">; -} // End SubtargetPredicate = isVI +defm V_CMP_F_I16 : VOPC_I16 <"v_cmp_f_i16">; +defm V_CMP_LT_I16 : VOPC_I16 <"v_cmp_lt_i16", COND_SLT, "v_cmp_gt_i16">; +defm V_CMP_EQ_I16 : VOPC_I16 <"v_cmp_eq_i16">; +defm V_CMP_LE_I16 : VOPC_I16 <"v_cmp_le_i16", COND_SLE, "v_cmp_ge_i16">; +defm V_CMP_GT_I16 : VOPC_I16 <"v_cmp_gt_i16", COND_SGT>; +defm V_CMP_NE_I16 : VOPC_I16 <"v_cmp_ne_i16">; +defm V_CMP_GE_I16 : VOPC_I16 <"v_cmp_ge_i16", COND_SGE>; +defm V_CMP_T_I16 : VOPC_I16 <"v_cmp_t_i16">; + +defm V_CMP_F_U16 : VOPC_I16 <"v_cmp_f_u16">; +defm V_CMP_LT_U16 : VOPC_I16 <"v_cmp_lt_u16", COND_ULT, "v_cmp_gt_u16">; +defm V_CMP_EQ_U16 : VOPC_I16 <"v_cmp_eq_u16", COND_EQ>; +defm V_CMP_LE_U16 : VOPC_I16 <"v_cmp_le_u16", COND_ULE, "v_cmp_ge_u16">; +defm V_CMP_GT_U16 : VOPC_I16 <"v_cmp_gt_u16", COND_UGT>; +defm V_CMP_NE_U16 : VOPC_I16 <"v_cmp_ne_u16", COND_NE>; +defm V_CMP_GE_U16 : VOPC_I16 <"v_cmp_ge_u16", COND_UGE>; +defm V_CMP_T_U16 : VOPC_I16 <"v_cmp_t_u16">; + +} // End SubtargetPredicate = Has16BitInsts defm V_CMP_F_I32 : VOPC_I32 <"v_cmp_f_i32">; defm V_CMP_LT_I32 : VOPC_I32 <"v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">; @@ -973,6 +995,24 @@ defm V_CMPX_NLT_F64 : VOPC_Real_vi <0x7e>; defm V_CMPX_TRU_F64 : VOPC_Real_vi <0x7f>; +defm V_CMP_F_I16 : VOPC_Real_vi <0xa0>; +defm V_CMP_LT_I16 : VOPC_Real_vi <0xa1>; +defm V_CMP_EQ_I16 : VOPC_Real_vi <0xa2>; +defm V_CMP_LE_I16 : VOPC_Real_vi <0xa3>; +defm V_CMP_GT_I16 : VOPC_Real_vi <0xa4>; +defm V_CMP_NE_I16 : VOPC_Real_vi <0xa5>; +defm V_CMP_GE_I16 : VOPC_Real_vi <0xa6>; +defm V_CMP_T_I16 : VOPC_Real_vi <0xa7>; + +defm V_CMP_F_U16 : VOPC_Real_vi <0xa8>; +defm V_CMP_LT_U16 : VOPC_Real_vi <0xa9>; +defm V_CMP_EQ_U16 : VOPC_Real_vi <0xaa>; +defm V_CMP_LE_U16 : VOPC_Real_vi <0xab>; +defm V_CMP_GT_U16 : VOPC_Real_vi <0xac>; +defm V_CMP_NE_U16 : VOPC_Real_vi <0xad>; +defm V_CMP_GE_U16 : VOPC_Real_vi <0xae>; +defm V_CMP_T_U16 : VOPC_Real_vi <0xaf>; + defm V_CMP_F_I32 : VOPC_Real_vi <0xc0>; defm V_CMP_LT_I32 : VOPC_Real_vi <0xc1>; defm V_CMP_EQ_I32 : VOPC_Real_vi <0xc2>; Index: test/CodeGen/AMDGPU/fcmp.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcmp.f16.ll +++ test/CodeGen/AMDGPU/fcmp.f16.ll @@ -24,6 +24,34 @@ ret void } +; GCN-LABEL: {{^}}fcmp_f16_lt_abs: +; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] +; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] + +; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] +; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] + +; SI: v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F32]]|, |v[[B_F32]]| +; VI: v_cmp_lt_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F16]]|, |v[[B_F16]]| + +; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] +; GCN: buffer_store_dword v[[R_I32]] +; GCN: s_endpgm +define void @fcmp_f16_lt_abs( + i32 addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %a.abs = call half @llvm.fabs.f16(half %a.val) + %b.abs = call half @llvm.fabs.f16(half %b.val) + %r.val = fcmp olt half %a.abs, %b.abs + %r.val.sext = sext i1 %r.val to i32 + store i32 %r.val.sext, i32 addrspace(1)* %r + ret void +} + ; GCN-LABEL: {{^}}fcmp_f16_eq ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] @@ -742,3 +770,8 @@ store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r ret void } + +declare half @llvm.fabs.f16(half) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/icmp.i16.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/icmp.i16.ll @@ -0,0 +1,353 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s| FileCheck -check-prefix=GCN -check-prefix=SI %s + +;;;==========================================================================;;; +;; 16-bit integer comparisons +;;;==========================================================================;;; + +; GCN-LABEL: {{^}}i16_eq: +; VI: v_cmp_eq_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +define void @i16_eq(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %b = load i16, i16 addrspace(1)* %b.gep + %tmp0 = icmp eq i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_ne: +; VI: v_cmp_ne_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +define void @i16_ne(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %b = load i16, i16 addrspace(1)* %b.gep + %tmp0 = icmp ne i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_ugt: +; VI: v_cmp_gt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +define void @i16_ugt(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %b = load i16, i16 addrspace(1)* %b.gep + %tmp0 = icmp ugt i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_uge: +; VI: v_cmp_ge_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_ge_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +define void @i16_uge(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %b = load i16, i16 addrspace(1)* %b.gep + %tmp0 = icmp uge i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_ult: +; VI: v_cmp_lt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +define void @i16_ult(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %b = load i16, i16 addrspace(1)* %b.gep + %tmp0 = icmp ult i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_ule: +; VI: v_cmp_le_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_le_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +define void @i16_ule(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %b = load i16, i16 addrspace(1)* %b.gep + %tmp0 = icmp ule i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void + +} + +; GCN-LABEL: {{^}}i16_sgt: +; VI: v_cmp_gt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_gt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +define void @i16_sgt(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %b = load i16, i16 addrspace(1)* %b.gep + %tmp0 = icmp sgt i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_sge: +; VI: v_cmp_ge_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_ge_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +define void @i16_sge(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %b = load i16, i16 addrspace(1)* %b.gep + %tmp0 = icmp sge i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_slt: +; VI: v_cmp_lt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_lt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +define void @i16_slt(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %b = load i16, i16 addrspace(1)* %b.gep + %tmp0 = icmp slt i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_sle: +; VI: v_cmp_le_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_le_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +define void @i16_sle(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %b = load i16, i16 addrspace(1)* %b.gep + %tmp0 = icmp sle i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; These should be commuted to reduce code size +; GCN-LABEL: {{^}}i16_eq_v_s: +; VI: v_cmp_eq_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_eq_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +define void @i16_eq_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %tmp0 = icmp eq i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_ne_v_s: +; VI: v_cmp_ne_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_ne_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +define void @i16_ne_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %tmp0 = icmp ne i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_ugt_v_s: +; VI: v_cmp_lt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_lt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +define void @i16_ugt_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %tmp0 = icmp ugt i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_uge_v_s: +; VI: v_cmp_le_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_le_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +define void @i16_uge_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %tmp0 = icmp uge i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_ult_v_s: +; VI: v_cmp_gt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +define void @i16_ult_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %tmp0 = icmp ult i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_ule_v_s: +; VI: v_cmp_ge_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_ge_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +define void @i16_ule_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %tmp0 = icmp ule i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_sgt_v_s: +; VI: v_cmp_lt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_lt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +define void @i16_sgt_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %tmp0 = icmp sgt i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_sge_v_s: +; VI: v_cmp_le_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +define void @i16_sge_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %tmp0 = icmp sge i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_slt_v_s: +; VI: v_cmp_gt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_gt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +define void @i16_slt_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %tmp0 = icmp slt i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}i16_sle_v_s: +; VI: v_cmp_ge_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_ge_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} +define void @i16_sle_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext + %a = load i16, i16 addrspace(1)* %a.gep + %tmp0 = icmp sle i16 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out.gep + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/setcc-opt.ll =================================================================== --- test/CodeGen/AMDGPU/setcc-opt.ll +++ test/CodeGen/AMDGPU/setcc-opt.ll @@ -149,9 +149,13 @@ ; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c ; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff -; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]] ; GCN-DAG: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]] -; GCN: v_cmp_ne_u32_e32 vcc, [[B]], [[VK255]] +; SI-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]] +; SI: v_cmp_ne_u32_e32 vcc, [[B]], [[VK255]] + +; VI-DAG: v_and_b32_e32 [[B:v[0-9]+]], [[VALUE]], [[VK255]] +; VI: v_cmp_ne_u16_e32 vcc, [[K255]], [[B]] + ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc ; GCN: buffer_store_byte [[RESULT]] ; GCN: s_endpgm