diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -631,6 +631,12 @@ /// addressing mode usage. bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Try to reassociate to reassociate operands of a commutative binop. + bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, + Register Op1, BuildFnTy &MatchInfo); + /// Reassociate commutative binary operations like G_ADD. + bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Do constant folding when opportunities are exposed after MIR building. bool matchConstantFold(MachineInstr &MI, APInt &MatchInfo); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -32,6 +32,7 @@ #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/LowLevelTypeUtils.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -3583,6 +3584,17 @@ return N0.hasOneUse(); } + // Lets target to control the following reassociation of operands: (op (op x, + // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By + // default consider profitable any case where N0 has single use. This + // behavior reflects the condition replaced by this target hook call in the + // combiner. Any particular target can implement its own heuristic to + // restrict common combiner. + virtual bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, + Register N1) const { + return MRI.hasOneNonDBGUse(N0); + } + virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { return false; } diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -862,7 +862,13 @@ [{ return Helper.matchReassocPtrAdd(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; -def reassocs : GICombineGroup<[reassoc_ptradd]>; +def reassoc_comm_binops : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_ADD $root, $src1, $src2):$root, + [{ return Helper.matchReassocCommBinOp(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + +def reassocs : GICombineGroup<[reassoc_ptradd, reassoc_comm_binops]>; // Constant fold operations. def constant_fold : GICombineRule< diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4466,6 +4466,58 @@ return false; } +bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg, + Register OpLHS, Register OpRHS, + BuildFnTy &MatchInfo) { + LLT OpRHSTy = MRI.getType(OpRHS); + MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS); + + if (OpLHSDef->getOpcode() != Opc) + return false; + + MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS); + Register OpLHSLHS = OpLHSDef->getOperand(1).getReg(); + Register OpLHSRHS = OpLHSDef->getOperand(2).getReg(); + + if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI)) { + if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) { + // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2)) + MatchInfo = [=](MachineIRBuilder &B) { + auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS}); + B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst}); + }; + return true; + } + if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS) && + MRI.hasOneNonDBGUse(OpLHSLHS)) { + // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) + // iff (op x, c1) has one use + MatchInfo = [=](MachineIRBuilder &B) { + auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS}); + B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS}); + }; + return true; + } + } + + return false; +} + +bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI, + BuildFnTy &MatchInfo) { + // We don't check if the reassociation will break a legal addressing mode + // here since pointer arithmetic is handled by G_PTR_ADD. + unsigned Opc = MI.getOpcode(); + Register DstReg = MI.getOperand(0).getReg(); + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + + if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo)) + return true; + if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo)) + return true; + return false; +} bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { Register Op1 = MI.getOperand(1).getReg(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -292,6 +292,9 @@ bool SNaN = false, unsigned Depth = 0) const override; + bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, + Register N1) const override; + /// Helper function that adds Reg to the LiveIn list of the DAG's /// MachineFunction. /// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -5108,6 +5108,11 @@ } } +bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI, + Register N0, Register N1) const { + return false; // FIXME: handle regbanks +} + TargetLowering::AtomicExpansionKind AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { switch (RMW->getOperation()) { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -483,6 +483,9 @@ bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override; + bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, + Register N1) const override; + bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth = 5) const; bool isCanonicalized(Register Reg, MachineFunction &MF, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -13436,6 +13436,11 @@ hasMemSDNodeUser(*N0->use_begin())); } +bool SITargetLowering::isReassocProfitable(MachineRegisterInfo &MRI, + Register N0, Register N1) const { + return false; // FIXME: handle regbanks +} + MachineMemOperand::Flags SITargetLowering::getTargetMMOFlags(const Instruction &I) const { // Propagate metadata set by AMDGPUAnnotateUniformValues to the MMO of a load. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-binop-reassoc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-binop-reassoc.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-binop-reassoc.mir @@ -0,0 +1,124 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s + +# Combins: (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2)) +--- +name: test1_add_move_inner_cst_to_fold +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test1_add_move_inner_cst_to_fold + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s64) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 88 + ; CHECK-NEXT: %add_outer:_(s64) = G_ADD %x, [[C]] + ; CHECK-NEXT: $x0 = COPY %add_outer(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %x:_(s64) = COPY $x0 + %C1:_(s64) = G_CONSTANT i64 64 + %C2:_(s64) = G_CONSTANT i64 24 + %add_inner:_(s64) = G_ADD %x, %C1 + %add_outer:_(s64) = G_ADD %add_inner, %C2 + $x0 = COPY %add_outer + RET_ReallyLR implicit $x0 + +... + +# (op (op x, c1), y) -> (op (op x, y), c1) +--- +name: test2_add_move_inner_cst_to_rhs +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test2_add_move_inner_cst_to_rhs + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s64) = COPY $x0 + ; CHECK-NEXT: %C1:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: %y:_(s64) = COPY $x1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %x, %y + ; CHECK-NEXT: %add_outer:_(s64) = G_ADD [[ADD]], %C1 + ; CHECK-NEXT: $x0 = COPY %add_outer(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %x:_(s64) = COPY $x0 + %C1:_(s64) = G_CONSTANT i64 64 + %y:_(s64) = COPY $x1 + %add_inner:_(s64) = G_ADD %x, %C1 + %add_outer:_(s64) = G_ADD %add_inner, %y + $x0 = COPY %add_outer + RET_ReallyLR implicit $x0 + +... +--- +name: test2_add_move_inner_cst_to_rhs_multiuse +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test2_add_move_inner_cst_to_rhs_multiuse + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s64) = COPY $x0 + ; CHECK-NEXT: %C1:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: %y:_(s64) = COPY $x1 + ; CHECK-NEXT: %add_inner:_(s64) = G_ADD %x, %C1 + ; CHECK-NEXT: %add_outer:_(s64) = G_ADD %add_inner, %y + ; CHECK-NEXT: $x0 = COPY %add_outer(s64) + ; CHECK-NEXT: $x1 = COPY %add_inner(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %x:_(s64) = COPY $x0 + %C1:_(s64) = G_CONSTANT i64 64 + %y:_(s64) = COPY $x1 + %add_inner:_(s64) = G_ADD %x, %C1 + %add_outer:_(s64) = G_ADD %add_inner, %y + $x0 = COPY %add_outer + $x1 = COPY %add_inner + RET_ReallyLR implicit $x0 + +... +--- +name: test2_add_move_inner_cst_to_rhs_vector +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test2_add_move_inner_cst_to_rhs_vector + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: %C1:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: %VEC_C1:_(<2 x s64>) = G_BUILD_VECTOR %C1(s64), %C1(s64) + ; CHECK-NEXT: %y:_(<2 x s64>) = COPY $q1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<2 x s64>) = G_ADD %x, %y + ; CHECK-NEXT: %add_outer:_(<2 x s64>) = G_ADD [[ADD]], %VEC_C1 + ; CHECK-NEXT: $q0 = COPY %add_outer(<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %x:_(<2 x s64>) = COPY $q0 + %C1:_(s64) = G_CONSTANT i64 64 + %VEC_C1:_(<2 x s64>) = G_BUILD_VECTOR %C1, %C1 + %y:_(<2 x s64>) = COPY $q1 + %add_inner:_(<2 x s64>) = G_ADD %x, %VEC_C1 + %add_outer:_(<2 x s64>) = G_ADD %add_inner, %y + $q0 = COPY %add_outer + RET_ReallyLR implicit $q0 + +...