Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Target/AMDGPU/SIInstructions.td
Show First 20 Lines • Show All 86 Lines • ▼ Show 20 Lines | |||||
def ATOMIC_FENCE : SPseudoInstSI< | def ATOMIC_FENCE : SPseudoInstSI< | ||||
(outs), (ins i32imm:$ordering, i32imm:$scope), | (outs), (ins i32imm:$ordering, i32imm:$scope), | ||||
[(atomic_fence (i32 imm:$ordering), (i32 imm:$scope))], | [(atomic_fence (i32 imm:$ordering), (i32 imm:$scope))], | ||||
"ATOMIC_FENCE $ordering, $scope"> { | "ATOMIC_FENCE $ordering, $scope"> { | ||||
let hasSideEffects = 1; | let hasSideEffects = 1; | ||||
let maybeAtomic = 1; | let maybeAtomic = 1; | ||||
} | } | ||||
def VOP_I64_I64_DPP : VOPProfile <[i64, i64, untyped, untyped]> { | |||||
let HasExt = 1; | |||||
let HasExtDPP = 1; | |||||
} | |||||
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { | let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { | ||||
// For use in patterns | // For use in patterns | ||||
def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst), | def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst), | ||||
(ins VSrc_b64:$src0, VSrc_b64:$src1, SSrc_b64:$src2), "", []> { | (ins VSrc_b64:$src0, VSrc_b64:$src1, SSrc_b64:$src2), "", []> { | ||||
let isPseudo = 1; | let isPseudo = 1; | ||||
let isCodeGenOnly = 1; | let isCodeGenOnly = 1; | ||||
let usesCustomInserter = 1; | let usesCustomInserter = 1; | ||||
} | } | ||||
// 64-bit vector move instruction. This is mainly used by the | // 64-bit vector move instruction. This is mainly used by the | ||||
// SIFoldOperands pass to enable folding of inline immediates. | // SIFoldOperands pass to enable folding of inline immediates. | ||||
def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst), | def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst), | ||||
(ins VSrc_b64:$src0)>; | (ins VSrc_b64:$src0)>; | ||||
// 64-bit vector move with dpp. Expanded post-RA. | |||||
def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64_DPP> { | |||||
let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete. | |||||
} | |||||
// Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy after the | // Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy after the | ||||
// WQM pass processes it. | // WQM pass processes it. | ||||
def WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; | def WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; | ||||
// Pseudoinstruction for @llvm.amdgcn.softwqm. Like @llvm.amdgcn.wqm it is | // Pseudoinstruction for @llvm.amdgcn.softwqm. Like @llvm.amdgcn.wqm it is | ||||
// turned into a copy by WQM pass, but does not seed WQM requirements. | // turned into a copy by WQM pass, but does not seed WQM requirements. | ||||
def SOFT_WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; | def SOFT_WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; | ||||
▲ Show 20 Lines • Show All 1,732 Lines • ▼ Show 20 Lines | def : GCNPat < | ||||
(INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0) | (INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0) | ||||
>; | >; | ||||
def : GCNPat < | def : GCNPat < | ||||
(v4f16 (scalar_to_vector f16:$src0)), | (v4f16 (scalar_to_vector f16:$src0)), | ||||
(INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0) | (INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0) | ||||
>; | >; | ||||
def : GCNPat < | |||||
(i64 (int_amdgcn_mov_dpp i64:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, | |||||
timm:$bound_ctrl)), | |||||
(V_MOV_B64_DPP_PSEUDO $src, $src, (as_i32imm $dpp_ctrl), | |||||
(as_i32imm $row_mask), (as_i32imm $bank_mask), | |||||
(as_i1imm $bound_ctrl)) | |||||
>; | |||||
arsenm: Why not do the split here? Why treat it as a post-RA pseudo? At latest I would have expected… | |||||
Not Done ReplyInline ActionsI still don't see the point of the pseudo. You can just emit the reg_sequence directly here? arsenm: I still don't see the point of the pseudo. You can just emit the reg_sequence directly here? | |||||
def : GCNPat < | |||||
(i64 (int_amdgcn_update_dpp i64:$old, i64:$src, timm:$dpp_ctrl, timm:$row_mask, | |||||
timm:$bank_mask, timm:$bound_ctrl)), | |||||
(V_MOV_B64_DPP_PSEUDO $old, $src, (as_i32imm $dpp_ctrl), | |||||
(as_i32imm $row_mask), (as_i32imm $bank_mask), | |||||
(as_i1imm $bound_ctrl)) | |||||
>; | |||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
// Fract Patterns | // Fract Patterns | ||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
let SubtargetPredicate = isGFX6 in { | let SubtargetPredicate = isGFX6 in { | ||||
// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is | // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is | ||||
// used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient | // used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient | ||||
▲ Show 20 Lines • Show All 129 Lines • Show Last 20 Lines |
Why not do the split here? Why treat it as a post-RA pseudo? At latest I would have expected this to be expanded in FinalizeISel