Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -480,6 +480,8 @@ let Defs = [M0, EXEC, SCC], UseNamedOperandTable = 1 in { +// SI_INDIRECT_SRC/DST are only used by legacy SelectionDAG indirect +// addressing implementation. class SI_INDIRECT_SRC : VPseudoInstSI < (outs VGPR_32:$vdst), (ins rc:$src, VS_32:$idx, i32imm:$offset)> { @@ -493,7 +495,6 @@ let usesCustomInserter = 1; } -// TODO: We can support indirect SGPR access. def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC; def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC; def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC; @@ -508,6 +509,65 @@ } // End Uses = [EXEC], Defs = [M0, EXEC] + +// This is a pseudo variant of the v_movreld_b32 (or v_mov_b32 +// expecting to be executed with gpr indexing mode enabled) +// instruction in which the vector operand appears only twice, once as +// def and once as use. Using this pseudo avoids problems with the Two +// Address instructions pass. +class INDIRECT_REG_WRITE_pseudo : PseudoInstSI < + (outs rc:$vdst), (ins rc:$vsrc, val_ty:$val, i32imm:$subreg)> { + let Constraints = "$vsrc = $vdst"; + let Uses = [M0]; +} + +class V_INDIRECT_REG_WRITE_B32_pseudo : + INDIRECT_REG_WRITE_pseudo { + let VALU = 1; + let VOP1 = 1; + let Uses = [M0, EXEC]; +} + +class S_INDIRECT_REG_WRITE_pseudo : + INDIRECT_REG_WRITE_pseudo { + let SALU = 1; + let SOP1 = 1; + let Uses = [M0]; +} + +class S_INDIRECT_REG_WRITE_B32_pseudo : + S_INDIRECT_REG_WRITE_pseudo; +class S_INDIRECT_REG_WRITE_B64_pseudo : + S_INDIRECT_REG_WRITE_pseudo; + + +def V_INDIRECT_REG_WRITE_B32_V1 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V2 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V3 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V4 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V5 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V8 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V16 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V32 : V_INDIRECT_REG_WRITE_B32_pseudo; + +def S_INDIRECT_REG_WRITE_B32_V1 : S_INDIRECT_REG_WRITE_B32_pseudo; +def S_INDIRECT_REG_WRITE_B32_V2 : S_INDIRECT_REG_WRITE_B32_pseudo; +def S_INDIRECT_REG_WRITE_B32_V3 : S_INDIRECT_REG_WRITE_B32_pseudo; +def S_INDIRECT_REG_WRITE_B32_V4 : S_INDIRECT_REG_WRITE_B32_pseudo; +def S_INDIRECT_REG_WRITE_B32_V5 : S_INDIRECT_REG_WRITE_B32_pseudo; +def S_INDIRECT_REG_WRITE_B32_V8 : S_INDIRECT_REG_WRITE_B32_pseudo; +def S_INDIRECT_REG_WRITE_B32_V16 : S_INDIRECT_REG_WRITE_B32_pseudo; +def S_INDIRECT_REG_WRITE_B32_V32 : S_INDIRECT_REG_WRITE_B32_pseudo; + +def S_INDIRECT_REG_WRITE_B64_V1 : S_INDIRECT_REG_WRITE_B64_pseudo; +def S_INDIRECT_REG_WRITE_B64_V2 : S_INDIRECT_REG_WRITE_B64_pseudo; +def S_INDIRECT_REG_WRITE_B64_V4 : S_INDIRECT_REG_WRITE_B64_pseudo; +def S_INDIRECT_REG_WRITE_B64_V8 : S_INDIRECT_REG_WRITE_B64_pseudo; +def S_INDIRECT_REG_WRITE_B64_V16 : S_INDIRECT_REG_WRITE_B64_pseudo; + + multiclass SI_SPILL_SGPR { let UseNamedOperandTable = 1, SGPRSpill = 1, Uses = [EXEC] in { def _SAVE : PseudoInstSI < Index: llvm/lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SOPInstructions.td +++ llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -97,6 +97,17 @@ let has_sdst = 0; } +// Special case for movreld where sdst is treated as a use operand. +class SOP1_32_movreld pattern=[]> : SOP1_Pseudo < + opName, (outs), (ins SReg_32:$sdst, SSrc_b32:$src0), + "$sdst, $src0", pattern>; + +// Special case for movreld where sdst is treated as a use operand. +class SOP1_64_movreld pattern=[]> : SOP1_Pseudo < + opName, (outs), (ins SReg_64:$sdst, SSrc_b64:$src0), + "$sdst, $src0", pattern +>; + class SOP1_0_32R pattern = []> : SOP1_Pseudo < opName, (outs), (ins SReg_32:$src0), "$src0", pattern> { @@ -267,8 +278,8 @@ let Uses = [M0] in { def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">; def S_MOVRELS_B64 : SOP1_64R <"s_movrels_b64">; -def S_MOVRELD_B32 : SOP1_32 <"s_movreld_b32">; -def S_MOVRELD_B64 : SOP1_64 <"s_movreld_b64">; +def S_MOVRELD_B32 : SOP1_32_movreld <"s_movreld_b32">; +def S_MOVRELD_B64 : SOP1_64_movreld <"s_movreld_b64">; } // End Uses = [M0] let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in { Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -812,29 +812,6 @@ let SubtargetPredicate = isGFX8GFX9; } -// This is a pseudo variant of the v_movreld_b32 (or v_mov_b32 -// expecting to be executed with gpr indexing mode enabled) -// instruction in which the vector operand appears only twice, once as -// def and once as use. Using this pseudo avoids problems with the Two -// Address instructions pass. -class V_INDIRECT_REG_WRITE_B32_pseudo : VPseudoInstSI < - (outs rc:$vdst), - (ins rc:$vsrc, VSrc_b32:$val, i32imm:$subreg)> { - let VOP1 = 1; - - let Constraints = "$vsrc = $vdst"; - let Uses = [M0, EXEC]; -} - -def V_INDIRECT_REG_WRITE_B32_V1 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V2 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V3 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V4 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V5 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V8 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V16 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V32 : V_INDIRECT_REG_WRITE_B32_pseudo; - let OtherPredicates = [isGFX8Plus] in { def : GCNPat < Index: llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -422,12 +422,12 @@ bb.2: $m0 = S_MOV_B32 0 - $sgpr0 = S_MOVRELD_B32 $sgpr0, implicit $m0 + S_MOVRELD_B32 $sgpr0, $sgpr0, implicit $m0 S_BRANCH %bb.3 bb.3: $m0 = S_MOV_B32 0 - $sgpr0_sgpr1 = S_MOVRELD_B64 $sgpr0_sgpr1, implicit $m0 + S_MOVRELD_B64 $sgpr0_sgpr1, $sgpr0_sgpr1, implicit $m0 S_ENDPGM 0 ...