diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2266,6 +2266,7 @@ def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; +def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], 0, /*EnableClamp=*/1>; def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -626,9 +626,9 @@ defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>; def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; } // End FPDPRounding = 1 -defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16, add>; -defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16, sub>; -defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">; +defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; +defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; +defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir @@ -19,14 +19,14 @@ ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] ; GFX10-LABEL: name: add_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -51,14 +51,14 @@ ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] ; GFX10-LABEL: name: add_s16_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_ADD_U16_e64_]], 0, 16, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -84,13 +84,13 @@ ; GFX6-LABEL: name: add_s16_neg_inline_const_64 ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] ; GFX10-LABEL: name: add_s16_neg_inline_const_64 ; GFX10: liveins: $vgpr0 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -113,13 +113,13 @@ ; GFX6-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] ; GFX10-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 ; GFX10: liveins: $vgpr0 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_SUB_U16_e64_]], 0, 16, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_all.s b/llvm/test/MC/AMDGPU/gfx10_asm_all.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_all.s @@ -58995,6 +58995,9 @@ v_add_nc_u16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x03,0xd7,0x01,0xef,0x01,0x00] +v_add_nc_u16 v5, v1, -4.0 clamp +// GFX10: encoding: [0x05,0x80,0x03,0xd7,0x01,0xef,0x01,0x00] + v_sub_nc_u16 v5, v1, v2 // GFX10: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] @@ -59073,6 +59076,9 @@ v_sub_nc_u16 v5, v1, -4.0 // GFX10: encoding: [0x05,0x00,0x04,0xd7,0x01,0xef,0x01,0x00] +v_sub_nc_u16 v5, v1, -4.0 clamp +// GFX10: encoding: [0x05,0x80,0x04,0xd7,0x01,0xef,0x01,0x00] + v_mul_lo_u16 v5, v1, v2 // GFX10: encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -435,16 +435,31 @@ // VI: v_add_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] v_add_u16_e32 v1, v2, v3 +// NOSICI: error: invalid operand for instruction +// NOSICI: v_add_u16 v1, v2, v3 clamp +// VI: v_add_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x26,0xd1,0x02,0x07,0x02,0x00] +v_add_u16 v1, v2, v3 clamp + // NOSICI: error: instruction not supported on this GPU // NOSICI: v_sub_u16_e32 v1, v2, v3 // VI: v_sub_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] v_sub_u16_e32 v1, v2, v3 +// NOSICI: error: invalid operand for instruction +// NOSICI: v_sub_u16 v1, v2, v3 clamp +// VI: v_sub_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x27,0xd1,0x02,0x07,0x02,0x00] +v_sub_u16 v1, v2, v3 clamp + // NOSICI: error: instruction not supported on this GPU // NOSICI: v_subrev_u16_e32 v1, v2, v3 // VI: v_subrev_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] v_subrev_u16_e32 v1, v2, v3 +// NOSICI: error: invalid operand for instruction +// NOSICI: v_subrev_u16 v1, v2, v3 clamp +// VI: v_subrev_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x28,0xd1,0x02,0x07,0x02,0x00] +v_subrev_u16 v1, v2, v3 clamp + // NOSICI: error: instruction not supported on this GPU // NOSICI: v_mul_lo_u16_e32 v1, v2, v3 // VI: v_mul_lo_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -21401,6 +21401,9 @@ # GFX10: v_add_nc_u16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x03,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_add_nc_u16_e64 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x03,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x80,0x03,0xd7,0x6a,0x04,0x02,0x00 + # GFX10: v_add_nc_u32_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x4b] 0x01,0x05,0xfe,0x4b @@ -95808,6 +95811,9 @@ # GFX10: v_sub_nc_u16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x04,0xd7,0x6a,0x04,0x02,0x00 +# GFX10: v_sub_nc_u16_e64 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x04,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x80,0x04,0xd7,0x6a,0x04,0x02,0x00 + # GFX10: v_sub_nc_u32_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x4d] 0x01,0x05,0xfe,0x4d diff --git a/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt --- a/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt @@ -222,12 +222,21 @@ # VI: v_add_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] 0x02 0x07 0x02 0x4c +# VI: v_add_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x26,0xd1,0x02,0x07,0x02,0x00] +0x01 0x80 0x26 0xd1 0x02 0x07 0x02 0x00 + # VI: v_sub_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] 0x02 0x07 0x02 0x4e +# VI: v_sub_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x27,0xd1,0x02,0x07,0x02,0x00] +0x01 0x80 0x27 0xd1 0x02 0x07 0x02 0x00 + # VI: v_subrev_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] 0x02 0x07 0x02 0x50 +# VI: v_subrev_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x28,0xd1,0x02,0x07,0x02,0x00] +0x01 0x80 0x28 0xd1 0x02 0x07 0x02 0x00 + # VI: v_mul_lo_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] 0x02 0x07 0x02 0x52