Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -247,6 +247,13 @@ bool matchCombineP2IToI2P(MachineInstr &MI, Register &Reg); bool applyCombineP2IToI2P(MachineInstr &MI, Register &Reg); + /// Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) + /// Transform G_ADD y, (G_PTRTOINT x) -> G_PTRTOINT (G_PTR_ADD x, y) + bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, + std::pair &PtrRegAndCommute); + bool applyCombineAddP2IToPtrAdd(MachineInstr &MI, + std::pair &PtrRegAndCommute); + /// Return true if any explicit use operand on \p MI is defined by a /// G_IMPLICIT_DEF. bool matchAnyExplicitUseIsUndef(MachineInstr &MI); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -284,6 +284,16 @@ (apply [{ return Helper.applyCombineP2IToI2P(*${root}, ${info}); }]) >; +// Fold add ptrtoint(x), y -> ptrtoint (ptr_add x), y +def add_p2i_to_ptradd_matchinfo : GIDefMatchData<"std::pair">; +def add_p2i_to_ptradd : GICombineRule< + (defs root:$root, add_p2i_to_ptradd_matchinfo:$info), + (match (wip_match_opcode G_ADD):$root, + [{ return Helper.matchCombineAddP2IToPtrAdd(*${root}, ${info}); }]), + (apply [{ return Helper.applyCombineAddP2IToPtrAdd(*${root}, ${info}); }]) +>; + + // Simplify: (logic_op (op x...), (op y...)) -> (op (logic_op x, y)) def hoist_logic_op_with_same_opcode_hands: GICombineRule < (defs root:$root, instruction_steps_matchdata:$info), @@ -307,7 +317,7 @@ def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>; -def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl]>; +def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd]>; def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, combines_for_extload, combine_indexed_load_store, undef_combines, identity_combines, simplify_add_to_sub, Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1639,6 +1639,51 @@ return true; } +bool CombinerHelper::matchCombineAddP2IToPtrAdd( + MachineInstr &MI, std::pair &PtrReg) { + assert(MI.getOpcode() == TargetOpcode::G_ADD); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT IntTy = MRI.getType(LHS); + + // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the + // instruction. + PtrReg.second = false; + for (Register SrcReg : {LHS, RHS}) { + if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) { + // Don't handle cases where the integer is implicitly converted to the + // pointer width. + LLT PtrTy = MRI.getType(PtrReg.first); + if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits()) + return true; + } + + PtrReg.second = true; + } + + return false; +} + +bool CombinerHelper::applyCombineAddP2IToPtrAdd( + MachineInstr &MI, std::pair &PtrReg) { + Register Dst = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + const bool DoCommute = PtrReg.second; + if (DoCommute) + std::swap(LHS, RHS); + LHS = PtrReg.first; + + LLT PtrTy = MRI.getType(LHS); + + Builder.setInstrAndDebugLoc(MI); + auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS); + Builder.buildPtrToInt(Dst, PtrAdd); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { return MO.isReg() && Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-to-ptradd.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-to-ptradd.mir @@ -0,0 +1,180 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: add_ptrtoint_p1_to_s64_lhs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GCN-LABEL: name: add_ptrtoint_p1_to_s64_lhs + ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; GCN: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR_ADD]](p1) + ; GCN: $vgpr0_vgpr1 = COPY [[PTRTOINT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_PTRTOINT %0 + %3:_(s64) = G_ADD %2, %1 + $vgpr0_vgpr1 = COPY %3 + +... + +--- +name: add_ptrtoint_p1_to_s32_lhs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GCN-LABEL: name: add_ptrtoint_p1_to_s32_lhs + ; GCN: liveins: $vgpr0_vgpr1, $vgpr2 + ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p1) + ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PTRTOINT]], [[COPY1]] + ; GCN: $vgpr0 = COPY [[ADD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_PTRTOINT %0 + %3:_(s32) = G_ADD %2, %1 + $vgpr0 = COPY %3 + +... + +--- +name: add_ptrtoint_p3_to_s32_lhs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: add_ptrtoint_p3_to_s32_lhs + ; GCN: liveins: $vgpr0, $vgpr1 + ; GCN: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[COPY1]](s32) + ; GCN: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PTR_ADD]](p3) + ; GCN: $vgpr0 = COPY [[PTRTOINT]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_PTRTOINT %0 + %3:_(s32) = G_ADD %2, %1 + $vgpr0 = COPY %3 + +... + +--- +name: inttoptr_add_ptrtoint_p1_to_s64_lhs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GCN-LABEL: name: inttoptr_add_ptrtoint_p1_to_s64_lhs + ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; GCN: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_PTRTOINT %0 + %3:_(s64) = G_ADD %2, %1 + %4:_(p1) = G_INTTOPTR %3 + $vgpr0_vgpr1 = COPY %4 + +... + +--- +name: add_ptrtoint_v2p3_to_v2s32_lhs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GCN-LABEL: name: add_ptrtoint_v2p3_to_v2s32_lhs + ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GCN: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(<2 x p3>) = G_PTR_ADD [[COPY]], [[COPY1]](<2 x s32>) + ; GCN: [[PTRTOINT:%[0-9]+]]:_(<2 x s32>) = G_PTRTOINT [[PTR_ADD]](<2 x p3>) + ; GCN: $vgpr0_vgpr1 = COPY [[PTRTOINT]](<2 x s32>) + %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = G_PTRTOINT %0 + %3:_(<2 x s32>) = G_ADD %2, %1 + $vgpr0_vgpr1 = COPY %3 + +... + +--- +name: add_ptrtoint_v2p1_to_v2s32_lhs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 + + ; GCN-LABEL: name: add_ptrtoint_v2p1_to_v2s32_lhs + ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 + ; GCN: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GCN: [[PTRTOINT:%[0-9]+]]:_(<2 x s32>) = G_PTRTOINT [[COPY]](<2 x p1>) + ; GCN: [[ADD:%[0-9]+]]:_(<2 x s32>) = G_ADD [[PTRTOINT]], [[COPY1]] + ; GCN: $vgpr0_vgpr1 = COPY [[ADD]](<2 x s32>) + %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 + %2:_(<2 x s32>) = G_PTRTOINT %0 + %3:_(<2 x s32>) = G_ADD %2, %1 + $vgpr0_vgpr1 = COPY %3 + +... + +--- +name: add_ptrtoint_p1_to_s64_rhs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GCN-LABEL: name: add_ptrtoint_p1_to_s64_rhs + ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; GCN: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR_ADD]](p1) + ; GCN: $vgpr0_vgpr1 = COPY [[PTRTOINT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_PTRTOINT %0 + %3:_(s64) = G_ADD %1, %2 + $vgpr0_vgpr1 = COPY %3 + +... + +--- +name: add_ptrtoint_p1_to_s64_lhs_rhs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GCN-LABEL: name: add_ptrtoint_p1_to_s64_lhs_rhs + ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 + ; GCN: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p1) + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[PTRTOINT]](s64) + ; GCN: [[PTRTOINT1:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR_ADD]](p1) + ; GCN: $vgpr0_vgpr1 = COPY [[PTRTOINT1]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p1) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_PTRTOINT %0 + %3:_(s64) = G_PTRTOINT %1 + %4:_(s64) = G_ADD %2, %3 + $vgpr0_vgpr1 = COPY %4 + +...