diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -411,8 +411,9 @@ // Generic pointer offset. def G_PTR_ADD : GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src1, type1:$src2); + // FIXME: Not sure why it's not ptr in the first place? + let OutOperandList = (outs ptype0:$dst); + let InOperandList = (ins ptype0:$src1, type1:$src2); let hasSideEffects = false; } diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -55,7 +55,6 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; -def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -157,6 +156,9 @@ def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + // Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some // complications that tablegen must take care of. For example, Predicates such // as isSignExtLoad require that this is not a perfect 1:1 mapping since a diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -109,6 +109,9 @@ def SDTUNDEF : SDTypeProfile<1, 0, []>; // for 'undef'. def SDTUnaryOp : SDTypeProfile<1, 1, []>; // for bitconvert. +def SDTPtrAddOp : SDTypeProfile<1, 2, [ // ptradd + SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisPtrTy<1> +]>; def SDTIntBinOp : SDTypeProfile<1, 2, [ // add, and, or, xor, udiv, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0> ]>; @@ -373,6 +376,7 @@ def add : SDNode<"ISD::ADD" , SDTIntBinOp , [SDNPCommutative, SDNPAssociative]>; +def ptradd : SDNode<"ISD::ADD" , SDTPtrAddOp, []>; def sub : SDNode<"ISD::SUB" , SDTIntBinOp>; def mul : SDNode<"ISD::MUL" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3485,6 +3485,8 @@ case TargetOpcode::G_BUILD_VECTOR_TRUNC: return selectG_BUILD_VECTOR_TRUNC(I); case TargetOpcode::G_PTR_ADD: + if (selectImpl(I, *CoverageInfo)) + return true; return selectG_PTR_ADD(I); case TargetOpcode::G_IMPLICIT_DEF: return selectG_IMPLICIT_DEF(I); diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -449,11 +449,18 @@ // // FIXME: With unlucky SGPR operands, we could penalize code by // blocking folding SGPR->VGPR copies later. - // FIXME: There's no register bank verifier + // FIXME: There's no register bank verifier let GISelPredicateCode = [{ const int ConstantBusLimit = Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32_e64); + int ConstantBusUses = 0; for (unsigned i = 0; i < 3; ++i) { + + // FIX: Somehow my changes made this predicate run before the + // Feature check, so it crashes without this. + if(!Operands[i] || !Operands[i]->isReg()) + return true; + const RegisterBank *RegBank = RBI.getRegBank(Operands[i]->getReg(), MRI, TRI); if (RegBank->getID() == AMDGPU::SGPRRegBankID) { if (++ConstantBusUses > ConstantBusLimit) @@ -587,15 +594,17 @@ def : Cvt_SR_F8_F32_Pat; } -class ThreeOp_i32_Pats : GCNPat < - // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions. - (ThreeOpFrag i32:$src0, i32:$src1, i32:$src2), - (inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2) ->; +class ThreeOp_i32_Pats types = [i32, i32, i32]> + : GCNPat <(ThreeOpFrag types[0]:$src0, types[1]:$src1, types[2]:$src2), + (inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)>; def : ThreeOp_i32_Pats; def : ThreeOp_i32_Pats; + def : ThreeOp_i32_Pats; +def : ThreeOp_i32_Pats; +def : ThreeOp_i32_Pats; + def : ThreeOp_i32_Pats; def : ThreeOp_i32_Pats; def : ThreeOp_i32_Pats; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir @@ -137,9 +137,8 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]] + ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -174,9 +173,8 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]] + ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll @@ -33,8 +33,7 @@ ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 +; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 ; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) @@ -63,14 +62,13 @@ ; GFX11-GISEL-LABEL: soff1_voff1: ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc +; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0 +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -117,8 +115,7 @@ ; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 +; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 ; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) @@ -149,15 +146,14 @@ ; GFX11-GISEL-LABEL: soff1_voff2: ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0 +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -204,8 +200,7 @@ ; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 +; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 ; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) @@ -235,15 +230,14 @@ ; GFX11-GISEL-LABEL: soff1_voff4: ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 2, v0 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0 +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -291,8 +285,7 @@ ; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 +; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 ; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) @@ -323,15 +316,15 @@ ; GFX11-GISEL-LABEL: soff2_voff1: ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0 +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -380,8 +373,7 @@ ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 +; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 ; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) @@ -413,16 +405,15 @@ ; GFX11-GISEL-LABEL: soff2_voff2: ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0 +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -471,8 +462,7 @@ ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 +; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 ; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) @@ -504,16 +494,15 @@ ; GFX11-GISEL-LABEL: soff2_voff4: ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 2, v0 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0 +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -561,8 +550,7 @@ ; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 +; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 ; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) @@ -594,15 +582,15 @@ ; GFX11-GISEL-LABEL: soff4_voff1: ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0 +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -651,8 +639,7 @@ ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 +; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 ; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) @@ -686,16 +673,15 @@ ; GFX11-GISEL-LABEL: soff4_voff2: ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0 +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -744,8 +730,7 @@ ; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 +; GFX940-GISEL-NEXT: v_add3_u32 v0, v1, s0, v0 ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 ; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) @@ -777,16 +762,15 @@ ; GFX11-GISEL-LABEL: soff4_voff4: ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 2, v0 +; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 -; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: v_add3_u32 v0, 4, s0, v0 +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc +; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -4007,8 +4007,11 @@ for (const TypeSetByHwMode &VTy : Src->getExtTypes()) { // Results don't have a name unless they are the root node. The caller will // set the name if appropriate. + // FIXME: Hack, shouldn't be required but somehow it needs to be else a s32 + // check is emitted for the Dst + bool OperandIsAPointer = SrcGIOrNull->TheDef->getName() == "G_PTR_ADD"; OperandMatcher &OM = InsnMatcher.addOperand(OpIdx++, "", TempOpIdx); - if (auto Error = OM.addTypeCheckPredicate(VTy, false /* OperandIsAPointer */)) + if (auto Error = OM.addTypeCheckPredicate(VTy, OperandIsAPointer)) return failedImport(toString(std::move(Error)) + " for result of Src pattern operator"); }