diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1623,8 +1623,11 @@ !if (!eq(NumSrcArgs, 1), !if (HasModifiers, // VOP1 with modifiers - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - clampmod0:$clamp, omod0:$omod) + !if(HasOMod, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + clampmod0:$clamp, omod0:$omod), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + clampmod0:$clamp)) /* else */, // VOP1 without modifiers !if (HasClamp, diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -144,6 +144,16 @@ def VOP1_F32_I32 : VOPProfileI2F ; def VOP1_F16_I16 : VOPProfileI2F ; +// OMod clears exceptions when set in these instructions +class VOP_SPECIAL_OMOD_PROF : + VOPProfile<[dstVt, srcVt, untyped, untyped]> { + + let HasOMod = 1; +} +def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; +def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; +def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; + //===----------------------------------------------------------------------===// // VOP1 Instructions //===----------------------------------------------------------------------===// @@ -188,7 +198,7 @@ } let SchedRW = [WriteDoubleCvt] in { -defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; +defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>; let mayRaiseFPException = 0 in { defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; @@ -196,7 +206,7 @@ defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; -defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; +defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_uint>; let mayRaiseFPException = 0 in { defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; @@ -213,8 +223,8 @@ defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; } -defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; -defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; +defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>; +defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>; let FPDPRounding = 1 in { defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; } // End FPDPRounding = 1 @@ -222,8 +232,8 @@ defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; let ReadsModeReg = 0, mayRaiseFPException = 0 in { -defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; -defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; +defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32_SPECIAL_OMOD, cvt_rpi_i32_f32>; +defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32_SPECIAL_OMOD, cvt_flr_i32_f32>; defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; } // End ReadsModeReg = 0, mayRaiseFPException = 0 } // End SchedRW = [WriteFloatCvt] @@ -268,7 +278,7 @@ defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; let SchedRW = [WriteDoubleAdd] in { -defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; +defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64_SPECIAL_OMOD, int_amdgcn_frexp_exp>; defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; let FPDPRounding = 1 in { defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; @@ -381,8 +391,8 @@ defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; } // End FPDPRounding = 1 -defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; -defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; +defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>; +defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>; let TRANS = 1, SchedRW = [WriteTrans32] in { defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; @@ -393,7 +403,7 @@ defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } // End TRANS = 1, SchedRW = [WriteTrans32] defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; -defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>; +defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>; defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>; defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; @@ -437,8 +447,8 @@ defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; let mayRaiseFPException = 0 in { - defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; - defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; + defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; + defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; } // End mayRaiseFPException = 0 } // End SubtargetPredicate = isGFX9Plus diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s @@ -367,6 +367,12 @@ v_cvt_i32_f64_e64 v5, v[1:2] clamp // GFX10: encoding: [0x05,0x80,0x83,0xd5,0x01,0x01,0x00,0x00] +v_cvt_i32_f64_e64 v5, s[4:5] mul:2 +// GFX10: encoding: [0x05,0x00,0x83,0xd5,0x04,0x00,0x00,0x08] + +v_cvt_i32_f64_e64 v5, v[1:2] clamp div:2 +// GFX10: encoding: [0x05,0x80,0x83,0xd5,0x01,0x01,0x00,0x18] + v_cvt_f64_i32_e32 v[5:6], v1 // GFX10: encoding: [0x01,0x09,0x0a,0x7e] @@ -1126,6 +1132,12 @@ v_cvt_u32_f32_e64 v5, v1 clamp // GFX10: encoding: [0x05,0x80,0x87,0xd5,0x01,0x01,0x00,0x00] +v_cvt_u32_f32_e64 v5, s1 mul:2 +// GFX10: encoding: [0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x08] + +v_cvt_u32_f32_e64 v5, v1 clamp div:2 +// GFX10: encoding: [0x05,0x80,0x87,0xd5,0x01,0x01,0x00,0x18] + v_cvt_u32_f32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // GFX10: encoding: [0xf9,0x0e,0x0a,0x7e,0x01,0x06,0x06,0x00] @@ -1393,6 +1405,12 @@ v_cvt_i32_f32_e64 v5, v1 clamp // GFX10: encoding: [0x05,0x80,0x88,0xd5,0x01,0x01,0x00,0x00] +v_cvt_i32_f32_e64 v5, v1 mul:2 +// GFX10: encoding: [0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x08] + +v_cvt_i32_f32_e64 v5, v1 clamp div:2 +// GFX10: encoding: [0x05,0x80,0x88,0xd5,0x01,0x01,0x00,0x18] + v_cvt_i32_f32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // GFX10: encoding: [0xf9,0x10,0x0a,0x7e,0x01,0x06,0x06,0x00] @@ -4330,6 +4348,12 @@ v_cvt_u32_f64_e64 v5, v[1:2] clamp // GFX10: encoding: [0x05,0x80,0x95,0xd5,0x01,0x01,0x00,0x00] +v_cvt_u32_f64_e64 v5, s[4:5] mul:2 +// GFX10: encoding: [0x05,0x00,0x95,0xd5,0x04,0x00,0x00,0x08] + +v_cvt_u32_f64_e64 v5, v[1:2] clamp div:2 +// GFX10: encoding: [0x05,0x80,0x95,0xd5,0x01,0x01,0x00,0x18] + v_cvt_f64_u32 v[5:6], v1 // GFX10: encoding: [0x01,0x2d,0x0a,0x7e] @@ -11719,6 +11743,12 @@ v_cvt_u16_f16_e64 v5, v1 clamp // GFX10: encoding: [0x05,0x80,0xd2,0xd5,0x01,0x01,0x00,0x00] +v_cvt_u16_f16_e64 v5, s1 mul:2 +// GFX10: encoding: [0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x08] + +v_cvt_u16_f16_e64 v5, v1 clamp div:2 +// GFX10: encoding: [0x05,0x80,0xd2,0xd5,0x01,0x01,0x00,0x18] + v_cvt_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // GFX10: encoding: [0xf9,0xa4,0x0a,0x7e,0x01,0x06,0x06,0x00] @@ -11974,6 +12004,12 @@ v_cvt_i16_f16_e64 v5, v1 clamp // GFX10: encoding: [0x05,0x80,0xd3,0xd5,0x01,0x01,0x00,0x00] +v_cvt_i16_f16_e64 v5, v1 mul:2 +// GFX10: encoding: [0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x08] + +v_cvt_i16_f16_e64 v5, v1 clamp div:2 +// GFX10: encoding: [0x05,0x80,0xd3,0xd5,0x01,0x01,0x00,0x18] + v_cvt_i16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // GFX10: encoding: [0xf9,0xa6,0x0a,0x7e,0x01,0x06,0x06,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s --- a/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s @@ -258,6 +258,12 @@ v_cvt_i32_f64_e64 v5, v[1:2] clamp // CHECK: [0x05,0x80,0x43,0xd1,0x01,0x01,0x00,0x00] +v_cvt_i32_f64_e64 v5, s[4:5] mul:2 +// CHECK: [0x05,0x00,0x43,0xd1,0x04,0x00,0x00,0x08] + +v_cvt_i32_f64_e64 v5, v[1:2] clamp div:2 +// CHECK: [0x05,0x80,0x43,0xd1,0x01,0x01,0x00,0x18] + v_cvt_f64_i32_e64 v[5:6], v1 // CHECK: [0x05,0x00,0x44,0xd1,0x01,0x01,0x00,0x00] @@ -555,6 +561,12 @@ v_cvt_u32_f32_e64 v5, v1 clamp // CHECK: [0x05,0x80,0x47,0xd1,0x01,0x01,0x00,0x00] +v_cvt_u32_f32_e64 v5, s1 mul:2 +// CHECK: [0x05,0x00,0x47,0xd1,0x01,0x00,0x00,0x08] + +v_cvt_u32_f32_e64 v5, v1 clamp div:2 +// CHECK: [0x05,0x80,0x47,0xd1,0x01,0x01,0x00,0x18] + v_cvt_i32_f32_e64 v5, v1 // CHECK: [0x05,0x00,0x48,0xd1,0x01,0x01,0x00,0x00] @@ -627,6 +639,12 @@ v_cvt_i32_f32_e64 v5, v1 clamp // CHECK: [0x05,0x80,0x48,0xd1,0x01,0x01,0x00,0x00] +v_cvt_i32_f32_e64 v5, v1 mul:2 +// CHECK: [0x05,0x00,0x48,0xd1,0x01,0x01,0x00,0x08] + +v_cvt_i32_f32_e64 v5, v1 clamp div:2 +// CHECK: [0x05,0x80,0x48,0xd1,0x01,0x01,0x00,0x18] + v_cvt_f16_f32_e64 v5, v1 // CHECK: [0x05,0x00,0x4a,0xd1,0x01,0x01,0x00,0x00] @@ -858,6 +876,9 @@ v_cvt_rpi_i32_f32_e64 v5, |v1| // CHECK: [0x05,0x01,0x4c,0xd1,0x01,0x01,0x00,0x00] +v_cvt_rpi_i32_f32_e64 v5, v255 mul:2 +// CHECK: [0x05,0x00,0x4c,0xd1,0xff,0x01,0x00,0x08] + v_cvt_flr_i32_f32_e64 v5, v1 // CHECK: [0x05,0x00,0x4d,0xd1,0x01,0x01,0x00,0x00] @@ -927,6 +948,9 @@ v_cvt_flr_i32_f32_e64 v5, |v1| // CHECK: [0x05,0x01,0x4d,0xd1,0x01,0x01,0x00,0x00] +v_cvt_flr_i32_f32_e64 v5, vcc_hi mul:2 +// CHECK: [0x05,0x00,0x4d,0xd1,0x6b,0x00,0x00,0x08] + v_cvt_off_f32_i4_e64 v5, v1 // CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x01,0x00,0x00] @@ -1512,6 +1536,12 @@ v_cvt_u32_f64_e64 v5, v[1:2] clamp // CHECK: [0x05,0x80,0x55,0xd1,0x01,0x01,0x00,0x00] +v_cvt_u32_f64_e64 v5, s[4:5] mul:2 +// CHECK: [0x05,0x00,0x55,0xd1,0x04,0x00,0x00,0x08] + +v_cvt_u32_f64_e64 v5, v[1:2] clamp div:2 +// CHECK: [0x05,0x80,0x55,0xd1,0x01,0x01,0x00,0x18] + v_cvt_f64_u32_e64 v[5:6], v1 // CHECK: [0x05,0x00,0x56,0xd1,0x01,0x01,0x00,0x00] @@ -3495,6 +3525,9 @@ v_frexp_exp_i32_f64_e64 v5, |v[1:2]| // CHECK: [0x05,0x01,0x70,0xd1,0x01,0x01,0x00,0x00] +v_frexp_exp_i32_f64_e64 v5, s[4:5] mul:2 +// CHECK: [0x05,0x00,0x70,0xd1,0x04,0x00,0x00,0x08] + v_frexp_mant_f64_e64 v[5:6], v[1:2] // CHECK: [0x05,0x00,0x71,0xd1,0x01,0x01,0x00,0x00] @@ -4041,6 +4074,12 @@ v_cvt_u16_f16_e64 v5, v1 clamp // CHECK: [0x05,0x80,0x7b,0xd1,0x01,0x01,0x00,0x00] +v_cvt_u16_f16_e64 v5, s1 mul:2 +// CHECK: [0x05,0x00,0x7b,0xd1,0x01,0x00,0x00,0x08] + +v_cvt_u16_f16_e64 v5, v1 clamp div:2 +// CHECK: [0x05,0x80,0x7b,0xd1,0x01,0x01,0x00,0x18] + v_cvt_i16_f16_e64 v5, v1 // CHECK: [0x05,0x00,0x7c,0xd1,0x01,0x01,0x00,0x00] @@ -4113,6 +4152,12 @@ v_cvt_i16_f16_e64 v5, v1 clamp // CHECK: [0x05,0x80,0x7c,0xd1,0x01,0x01,0x00,0x00] +v_cvt_i16_f16_e64 v5, v1 mul:2 +// CHECK: [0x05,0x00,0x7c,0xd1,0x01,0x01,0x00,0x08] + +v_cvt_i16_f16_e64 v5, v1 clamp div:2 +// CHECK: [0x05,0x80,0x7c,0xd1,0x01,0x01,0x00,0x18] + v_rcp_f16_e64 v5, v1 // CHECK: [0x05,0x00,0x7d,0xd1,0x01,0x01,0x00,0x00] @@ -4614,6 +4659,9 @@ v_frexp_exp_i16_f16_e64 v5, |v1| // CHECK: [0x05,0x01,0x83,0xd1,0x01,0x01,0x00,0x00] +v_frexp_exp_i16_f16_e64 v5, s1 mul:2 +// CHECK: [0x05,0x00,0x83,0xd1,0x01,0x00,0x00,0x08] + v_floor_f16_e64 v5, v1 // CHECK: [0x05,0x00,0x84,0xd1,0x01,0x01,0x00,0x00] @@ -5352,6 +5400,9 @@ v_cvt_norm_i16_f16_e64 v5, v1 clamp // CHECK: [0x05,0x80,0x8d,0xd1,0x01,0x01,0x00,0x00] +v_cvt_norm_i16_f16_e64 v5, v1 mul:2 +// CHECK: [0x05,0x00,0x8d,0xd1,0x01,0x01,0x00,0x08] + v_cvt_norm_u16_f16_e64 v5, v1 // CHECK: [0x05,0x00,0x8e,0xd1,0x01,0x01,0x00,0x00] @@ -5424,6 +5475,9 @@ v_cvt_norm_u16_f16_e64 v5, v1 clamp // CHECK: [0x05,0x80,0x8e,0xd1,0x01,0x01,0x00,0x00] +v_cvt_norm_u16_f16_e64 v5, v1 mul:2 +// CHECK: [0x05,0x00,0x8e,0xd1,0x01,0x01,0x00,0x08] + v_sat_pk_u8_i16_e64 v5, v1 // CHECK: [0x05,0x00,0x8f,0xd1,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/vop3-errs.s b/llvm/test/MC/AMDGPU/vop3-errs.s --- a/llvm/test/MC/AMDGPU/vop3-errs.s +++ b/llvm/test/MC/AMDGPU/vop3-errs.s @@ -43,9 +43,6 @@ v_cmp_le_f64_e64 vcc, v0, v1 mul:4 // GCN: error: invalid operand for instruction -v_cvt_u32_f32_e64 v0, v1 div:2 -// GCN: error: invalid operand for instruction - // // mul //