Index: llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td @@ -193,13 +193,14 @@ defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; -defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; let SchedRW = [WriteQuarterRate32] in { +defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32>; defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; +defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>; } // End SchedRW = [WriteQuarterRate32] let SchedRW = [WriteDouble] in { @@ -207,8 +208,6 @@ defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; } // End SchedRW = [WriteDouble]; -defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>; - let SchedRW = [WriteDouble] in { defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>; } // End SchedRW = [WriteDouble] @@ -223,9 +222,9 @@ defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>; defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>; -defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; let SchedRW = [WriteDoubleAdd] in { +defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; } // End SchedRW = [WriteDoubleAdd] @@ -289,9 +288,7 @@ defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT>; } // End Uses = [M0, EXEC] -let SchedRW = [WriteQuarterRate32] in { defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; -} // These instruction only exist on SI and CI let SubtargetPredicate = isSICI in { @@ -335,11 +332,15 @@ defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; +let SchedRW = [WriteQuarterRate32] in { defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>; defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; +defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; +defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; +} // End SchedRW = [WriteQuarterRate32] defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>; defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; @@ -347,8 +348,6 @@ defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>; defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; -defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; -defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } Index: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td @@ -283,10 +283,10 @@ def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile>; def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile>; def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile, fma>; -def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile, fma>; def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile, int_amdgcn_lerp>; let SchedRW = [WriteDoubleAdd] in { +def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile, fma>; def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile, fadd, 1>; def V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile, fmul, 1>; def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile, fminnum, 1>; @@ -374,6 +374,7 @@ let SchedRW = [WriteDouble]; } +let SchedRW = [Write64Bit] in { // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile>; @@ -387,17 +388,17 @@ def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile>; def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; } // End SubtargetPredicate = isVI - +} // End SchedRW = [Write64Bit] let SubtargetPredicate = isCIVI in { -let Constraints = "@earlyclobber $vdst" in { +let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile>; def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile>; -} // End Constraints = "@earlyclobber $vdst" +} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] let isCommutable = 1 in { -let SchedRW = [WriteDouble, WriteSALU] in { +let SchedRW = [WriteQuarterRate32, WriteSALU] in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; } // End SchedRW = [WriteDouble, WriteSALU] Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.exp2.f16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.exp2.f16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.exp2.f16.ll @@ -24,13 +24,13 @@ ; GCN-LABEL: {{^}}exp2_v2f16 ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] -; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] ; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] -; SI: v_exp_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]] -; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] +; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] ; SI: v_exp_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]] +; SI: v_exp_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] +; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; SI-NOT: and ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll @@ -24,13 +24,13 @@ ; GCN-LABEL: {{^}}sqrt_v2f16 ; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] -; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] ; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] -; SI: v_sqrt_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]] -; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] +; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] ; SI: v_sqrt_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]] +; SI: v_sqrt_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] +; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; SI-NOT: v_and_b32 ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] Index: llvm/trunk/test/CodeGen/AMDGPU/mad_64_32.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/mad_64_32.ll +++ llvm/trunk/test/CodeGen/AMDGPU/mad_64_32.ll @@ -98,10 +98,10 @@ ; GCN-LABEL: {{^}}mad_i64_i32_sextops_i31_i63: ; CI: v_lshl_b64 +; CI: v_bfe_i32 v[[B1:[0-9]+]], v1, 0, 31 ; CI: v_ashr_i64 -; CI: v_bfe_i32 v1, v1, 0, 31 -; CI: v_bfe_i32 v0, v0, 0, 31 -; CI: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3] +; CI: v_bfe_i32 v[[B2:[0-9]+]], v0, 0, 31 +; CI: v_mad_i64_i32 v[0:1], s[6:7], v[[B2]], v[[B1]], v[1:2] define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) #0 { %sext0 = sext i31 %arg0 to i63 %sext1 = sext i31 %arg1 to i63 Index: llvm/trunk/test/CodeGen/AMDGPU/mul.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/mul.ll +++ llvm/trunk/test/CodeGen/AMDGPU/mul.ll @@ -221,11 +221,11 @@ ; SI-DAG: v_mul_hi_u32 ; VI: s_mul_i32 -; VI: v_mad_u64_u32 +; VI: v_mul_hi_u32 ; VI: s_mul_i32 ; VI: v_mul_hi_u32 ; VI: v_mad_u64_u32 -; VI: v_mul_hi_u32 +; VI: v_mad_u64_u32 ; VI: v_mad_u64_u32