diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -506,8 +506,14 @@ SmallVectorImpl> &MatchInfo); /// Use a function which takes in a MachineIRBuilder to perform a combine. + /// By default, it erases the instruction \p MI from the function. bool applyBuildFn(MachineInstr &MI, std::function &MatchInfo); + /// Use a function which takes in a MachineIRBuilder to perform a combine. + /// This variant does not erase \p MI after calling the build function. + bool applyBuildFnNoErase(MachineInstr &MI, + std::function &MatchInfo); + bool matchFunnelShiftToRotate(MachineInstr &MI); void applyFunnelShiftToRotate(MachineInstr &MI); bool matchRotateOutOfRange(MachineInstr &MI); @@ -521,6 +527,9 @@ bool matchBitfieldExtractFromAnd( MachineInstr &MI, std::function &MatchInfo); + bool matchReassocPtrAdd(MachineInstr &MI, + std::function &MatchInfo); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); @@ -573,6 +582,11 @@ SmallDenseMap &MemOffset2Idx, const SmallVector &RegsToVisit, const unsigned MemSizeInBits); + + /// Examines the G_PTR_ADD instruction \p PtrAdd and determines if performing + /// a re-association of its operands would break an existing legal addressing + /// mode that the address computation currently represents. + bool reassociationCanBreakAddressingModePattern(MachineInstr &PtrAdd); }; } // namespace llvm diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -628,6 +628,14 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>; +def reassoc_ptradd : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_PTR_ADD):$root, + [{ return Helper.matchReassocPtrAdd(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; + +def reassocs : GICombineGroup<[reassoc_ptradd]>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -659,9 +667,10 @@ mul_by_neg_one]>; def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, - extract_vec_elt_combines, ptr_add_immed_chain, combines_for_extload, + extract_vec_elt_combines, combines_for_extload, combine_indexed_load_store, undef_combines, identity_combines, phi_combines, simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, + reassocs, ptr_add_immed_chain, shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1670,6 +1670,13 @@ if (!MaybeImmVal) return false; + // Don't do this combine if there multiple uses of the first PTR_ADD, + // since we may be able to compute the second PTR_ADD as an immediate + // offset anyway. Folding the first offset into the second may cause us + // to go beyond the bounds of our legal addressing modes. + if (!MRI.hasOneNonDBGUse(Add2)) + return false; + MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2); if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD) return false; @@ -3878,6 +3885,13 @@ return true; } +bool CombinerHelper::applyBuildFnNoErase( + MachineInstr &MI, std::function &MatchInfo) { + Builder.setInstrAndDebugLoc(MI); + MatchInfo(Builder); + return true; +} + /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate. bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); @@ -4021,6 +4035,133 @@ return true; } +bool CombinerHelper::reassociationCanBreakAddressingModePattern( + MachineInstr &PtrAdd) { + if (PtrAdd.getOpcode() != TargetOpcode::G_PTR_ADD) + return false; + + Register Src1Reg = PtrAdd.getOperand(1).getReg(); + MachineInstr *Src1Def = MRI.getVRegDef(Src1Reg); + if (Src1Def->getOpcode() != TargetOpcode::G_PTR_ADD) + return false; + + Register Src2Reg = PtrAdd.getOperand(2).getReg(); + + if (MRI.hasOneNonDBGUse(Src1Reg)) + return false; + + auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI); + auto C2 = getConstantVRegVal(Src2Reg, MRI); + if (!C1 || !C2) + return false; + + const APInt &C1APIntVal = *C1; + const APInt &C2APIntVal = *C2; + if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64) + return false; + + const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; + if (CombinedValueIntVal.getBitWidth() > 64) + return false; + const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); + + for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) { + // This combine may end up running before ptrtoint/inttoptr combines + // manage to eliminate redundant conversions, so try to look through them. + MachineInstr *ConvUseMI = &UseMI; + while (ConvUseMI->getOpcode() == TargetOpcode::G_INTTOPTR || + ConvUseMI->getOpcode() == TargetOpcode::G_PTRTOINT) { + Register DefReg = ConvUseMI->getOperand(0).getReg(); + if (!MRI.hasOneNonDBGUse(DefReg)) + break; + ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg); + } + auto LoadStore = ConvUseMI->getOpcode() == TargetOpcode::G_LOAD || + ConvUseMI->getOpcode() == TargetOpcode::G_STORE; + if (LoadStore) { + // Is x[offset2] already not a legal addressing mode? If so then + // reassociating the constants breaks nothing (we test offset2 because + // that's the one we hope to fold into the load or store). + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.BaseOffs = C2APIntVal.getSExtValue(); + unsigned AS = + MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace(); + Type *AccessTy = + getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()), + PtrAdd.getMF()->getFunction().getContext()); + const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering(); + if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM, + AccessTy, AS)) + continue; + + // Would x[offset1+offset2] still be a legal addressing mode? + AM.BaseOffs = CombinedValue; + if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM, + AccessTy, AS)) + return true; + } + } + + return false; +} + +bool CombinerHelper::matchReassocPtrAdd( + MachineInstr &MI, std::function &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); + // We're trying to match a few pointer computation patterns here for + // re-association opportunities. + // 1) Isolating a constant operand to be on the RHS, e.g.: + // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) + // + // 2) Folding two constants in each sub-tree as long as such folding + // doesn't break a legal addressing mode. + // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) + Register Src1Reg = MI.getOperand(1).getReg(); + Register Src2Reg = MI.getOperand(2).getReg(); + MachineInstr *LHS = MRI.getVRegDef(Src1Reg); + MachineInstr *RHS = MRI.getVRegDef(Src2Reg); + + if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) { + // Try to match example 1). + if (RHS->getOpcode() != TargetOpcode::G_ADD) + return false; + auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI); + if (!C2) + return false; + + MatchInfo = [=,&MI](MachineIRBuilder &B) { + LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); + + auto NewBase = + Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(NewBase.getReg(0)); + MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); + Observer.changedInstr(MI); + }; + } else { + // Try to match example 2. + Register LHSSrc1 = LHS->getOperand(1).getReg(); + Register LHSSrc2 = LHS->getOperand(2).getReg(); + auto C1 = getConstantVRegVal(LHSSrc2, MRI); + if (!C1) + return false; + auto C2 = getConstantVRegVal(Src2Reg, MRI); + if (!C2) + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(LHSSrc1); + MI.getOperand(2).setReg(NewCst.getReg(0)); + Observer.changedInstr(MI); + }; + } + return !reassociationCanBreakAddressingModePattern(MI); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +--- +name: test1_noreassoc_legal_already_new_is_illegal +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test1_noreassoc_legal_already_new_is_illegal + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4777 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 4) + ; CHECK: G_STORE [[C2]](s32), [[PTR_ADD]](p0) :: (store 4) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %2:_(s64) = G_CONSTANT i64 4777 + %4:_(s64) = G_CONSTANT i64 6 + %9:_(s32) = G_CONSTANT i32 0 + %10:_(p0) = G_PTR_ADD %0, %2(s64) + %11:_(p0) = G_PTR_ADD %10, %4(s64) + %7:_(s32) = G_LOAD %11(p0) :: (load 4) + G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test2_reassoc_already_legal_new_also_legal +alignment: 4 +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test2_reassoc_already_legal_new_also_legal + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 4) + ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store 4) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %2:_(s64) = G_CONSTANT i64 10 + %4:_(s64) = G_CONSTANT i64 6 + %9:_(s32) = G_CONSTANT i32 0 + %10:_(p0) = G_PTR_ADD %0, %2(s64) + %11:_(p0) = G_PTR_ADD %10, %4(s64) + %7:_(s32) = G_LOAD %11(p0) :: (load 4) + G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test3_noreassoc_only_oneuse +alignment: 4 +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test3_noreassoc_only_oneuse + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4783 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %10:_(s64) = G_CONSTANT i64 4783 + %9:_(p0) = G_PTR_ADD %0, %10(s64) + %7:_(s32) = G_LOAD %9(p0) :: (load 4) + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test4_reassoc_existing_is_already_illegal +alignment: 4 +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test4_reassoc_existing_is_already_illegal + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4096 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 4) + ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store 4) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %2:_(s64) = G_CONSTANT i64 17 + %4:_(s64) = G_CONSTANT i64 4079 + %9:_(s32) = G_CONSTANT i32 0 + %10:_(p0) = G_PTR_ADD %0, %2(s64) + %11:_(p0) = G_PTR_ADD %10, %4(s64) + %7:_(s32) = G_LOAD %11(p0) :: (load 4) + G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test5_add_on_rhs +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test5_add_on_rhs + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 1 + %3:_(s64) = G_ADD %1, %2 + %4:_(p0) = G_PTR_ADD %0, %3(s64) + %7:_(s32) = G_LOAD %4(p0) :: (load 1) + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +...