diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -40,19 +40,20 @@ VINTRP = 1 << 13, SDWA = 1 << 14, DPP = 1 << 15, + TRANS = 1 << 16, // Memory instruction formats. - MUBUF = 1 << 16, - MTBUF = 1 << 17, - SMRD = 1 << 18, - MIMG = 1 << 19, - EXP = 1 << 20, - FLAT = 1 << 21, - DS = 1 << 22, + MUBUF = 1 << 17, + MTBUF = 1 << 18, + SMRD = 1 << 19, + MIMG = 1 << 20, + EXP = 1 << 21, + FLAT = 1 << 22, + DS = 1 << 23, // Pseudo instruction formats. - VGPRSpill = 1 << 23, - SGPRSpill = 1 << 24, + VGPRSpill = 1 << 24, + SGPRSpill = 1 << 25, // High bits - other information. VM_CNT = UINT64_C(1) << 32, diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -33,6 +33,7 @@ field bit VINTRP = 0; field bit SDWA = 0; field bit DPP = 0; + field bit TRANS = 0; // Memory instruction formats. field bit MUBUF = 0; @@ -153,17 +154,18 @@ let TSFlags{13} = VINTRP; let TSFlags{14} = SDWA; let TSFlags{15} = DPP; - - let TSFlags{16} = MUBUF; - let TSFlags{17} = MTBUF; - let TSFlags{18} = SMRD; - let TSFlags{19} = MIMG; - let TSFlags{20} = EXP; - let TSFlags{21} = FLAT; - let TSFlags{22} = DS; - - let TSFlags{23} = VGPRSpill; - let TSFlags{24} = SGPRSpill; + let TSFlags{16} = TRANS; + + let TSFlags{17} = MUBUF; + let TSFlags{18} = MTBUF; + let TSFlags{19} = SMRD; + let TSFlags{20} = MIMG; + let TSFlags{21} = EXP; + let TSFlags{22} = FLAT; + let TSFlags{23} = DS; + + let TSFlags{24} = VGPRSpill; + let TSFlags{25} = SGPRSpill; let TSFlags{32} = VM_CNT; let TSFlags{33} = EXP_CNT; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -582,6 +582,14 @@ return get(Opcode).TSFlags & SIInstrFlags::DPP; } + static bool isTRANS(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::TRANS; + } + + bool isTRANS(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::TRANS; + } + static bool isVOP3P(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; } diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -242,25 +242,25 @@ defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; -let SchedRW = [WriteTrans32] in { +let TRANS = 1, SchedRW = [WriteTrans32] in { defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>; -} // End SchedRW = [WriteTrans32] +} // End TRANS = 1, SchedRW = [WriteTrans32] -let SchedRW = [WriteTrans64] in { +let TRANS = 1, SchedRW = [WriteTrans64] in { defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, any_amdgcn_sqrt>; -} // End SchedRW = [WriteTrans64] +} // End TRANS = 1, SchedRW = [WriteTrans64] -let SchedRW = [WriteTrans32] in { +let TRANS = 1, SchedRW = [WriteTrans32] in { defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; -} // End SchedRW = [WriteTrans32] +} // End TRANS = 1, SchedRW = [WriteTrans32] defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; @@ -339,7 +339,7 @@ } // End Uses = [M0, EXEC] let SubtargetPredicate = isGFX6GFX7 in { - let SchedRW = [WriteTrans32] in { + let TRANS = 1, SchedRW = [WriteTrans32] in { defm V_LOG_CLAMP_F32 : VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; defm V_RCP_CLAMP_F32 : @@ -350,7 +350,7 @@ VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; defm V_RSQ_LEGACY_F32 : VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; - } // End SchedRW = [WriteTrans32] + } // End TRANS = 1, SchedRW = [WriteTrans32] let SchedRW = [WriteDouble] in { defm V_RCP_CLAMP_F64 : @@ -361,10 +361,10 @@ } // End SubtargetPredicate = isGFX6GFX7 let SubtargetPredicate = isGFX7GFX8GFX9 in { - let SchedRW = [WriteTrans32] in { + let TRANS = 1, SchedRW = [WriteTrans32] in { defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; - } // End SchedRW = [WriteTrans32] + } // End TRANS = 1, SchedRW = [WriteTrans32] } // End SubtargetPredicate = isGFX7GFX8GFX9 let SubtargetPredicate = isGFX7Plus in { @@ -384,7 +384,7 @@ } // End FPDPRounding = 1 defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; -let SchedRW = [WriteTrans32] in { +let TRANS = 1, SchedRW = [WriteTrans32] in { defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; @@ -392,7 +392,7 @@ defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; -} // End SchedRW = [WriteTrans32] +} // End TRANS = 1, SchedRW = [WriteTrans32] defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>; defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -8,6 +8,7 @@ // dummies for outer let class LetDummies { + bit TRANS; bit ReadsModeReg; bit mayRaiseFPException; bit isCommutable;