diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -756,6 +756,13 @@ bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo); bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo); + /// Transform: + /// (x + y) - y -> x + /// (x + y) - x -> y + /// x - (y + x) -> 0 - y + /// x - (x + z) -> 0 - z + bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo); + /// \returns true if it is possible to simplify a select instruction \p MI /// to a min/max instruction of some sort. bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -950,6 +950,17 @@ [{ return Helper.matchTruncBuildVectorFold(*${op}, ${matchinfo}); }]), (apply [{ Helper.replaceSingleDefInstWithReg(*${op}, ${matchinfo}); }])>; +// Transform: +// (x + y) - y -> x +// (x + y) - x -> y +// x - (y + x) -> 0 - y +// x - (x + z) -> 0 - z +def sub_add_reg: GICombineRule < + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SUB):$root, + [{ return Helper.matchSubAddSameReg(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + def select_to_minmax: GICombineRule< (defs root:$root, build_fn_matchinfo:$info), (match (wip_match_opcode G_SELECT):$root, @@ -1019,7 +1030,8 @@ truncstore_merge, div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract, constant_fold, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, - and_or_disjoint_mask, fma_combines, fold_binop_into_select]>; + and_or_disjoint_mask, fma_combines, fold_binop_into_select, + sub_add_reg]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4825,6 +4825,50 @@ return true; } +bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SUB); + Register Dst = MI.getOperand(0).getReg(); + // (x + y) - z -> x (if y == z) + // (x + y) - z -> y (if x == z) + Register X, Y, Z; + if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) { + Register ReplaceReg; + int64_t CstX, CstY; + if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) && + mi_match(Z, MRI, m_SpecificICstOrSplat(CstY)))) + ReplaceReg = X; + else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) && + mi_match(Z, MRI, m_SpecificICstOrSplat(CstX)))) + ReplaceReg = Y; + if (ReplaceReg) { + MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); }; + return true; + } + } + + // x - (y + z) -> 0 - y (if x == z) + // x - (y + z) -> 0 - z (if x == y) + if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) { + Register ReplaceReg; + int64_t CstX; + if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) && + mi_match(Z, MRI, m_SpecificICstOrSplat(CstX)))) + ReplaceReg = Y; + else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) && + mi_match(Y, MRI, m_SpecificICstOrSplat(CstX)))) + ReplaceReg = Z; + if (ReplaceReg) { + MatchInfo = [=](MachineIRBuilder &B) { + auto Zero = B.buildConstant(MRI.getType(Dst), 0); + B.buildSub(Dst, Zero, ReplaceReg); + }; + return true; + } + } + return false; +} + MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UDIV); auto &UDiv = cast(MI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-add-of-sub.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-add-of-sub.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-add-of-sub.mir @@ -0,0 +1,364 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: simplify_to_x +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (x + y) - y -> x + ; CHECK-LABEL: name: simplify_to_x + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: $w0 = COPY %x(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %add:_(s32) = G_ADD %x, %y + %sub:_(s32) = G_SUB %add, %y + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 +... +--- +name: simplify_to_y +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (x + y) - x -> y + ; CHECK-LABEL: name: simplify_to_y + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: $w0 = COPY %y(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %add:_(s32) = G_ADD %x, %y + %sub:_(s32) = G_SUB %add, %x + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 +... +--- +name: simplify_to_constant_x +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (x + 1) - 1 -> x + ; CHECK-LABEL: name: simplify_to_constant_x + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: $w0 = COPY %x(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y1:_(s32) = G_CONSTANT i32 1 + %y2:_(s32) = G_CONSTANT i32 1 + %add:_(s32) = G_ADD %x, %y1 + %sub:_(s32) = G_SUB %add, %y2 + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 +... +--- +name: simplify_to_constant_y +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (x + y) - x -> y + ; CHECK-LABEL: name: simplify_to_constant_y + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: $w0 = COPY %y(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x1:_(s32) = G_CONSTANT i32 1 + %x2:_(s32) = G_CONSTANT i32 1 + %y:_(s32) = COPY $w1 + %add:_(s32) = G_ADD %x1, %y + %sub:_(s32) = G_SUB %add, %x2 + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 +... +--- +name: vector_simplify_to_x +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; (x + y) - y -> x + ; CHECK-LABEL: name: vector_simplify_to_x + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: $d0 = COPY %x(<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %x:_(<2 x s32>) = COPY $d0 + %y:_(<2 x s32>) = COPY $d1 + %add:_(<2 x s32>) = G_ADD %x, %y + %sub:_(<2 x s32>) = G_SUB %add, %y + $d0 = COPY %sub(<2 x s32>) + RET_ReallyLR implicit $d0 +... +--- +name: splat_simplify_to_x +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; (x + 1) - 1 -> x + ; CHECK-LABEL: name: splat_simplify_to_x + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: $d0 = COPY %x(<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %x:_(<2 x s32>) = COPY $d0 + %cst:_(s32) = G_CONSTANT i32 1 + %y1:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst + %y2:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst + %add:_(<2 x s32>) = G_ADD %x, %y1 + %sub:_(<2 x s32>) = G_SUB %add, %y2 + $d0 = COPY %sub(<2 x s32>) + RET_ReallyLR implicit $d0 +... +--- +name: unique_registers_no_fold +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; CHECK-LABEL: name: unique_registers_no_fold + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: %z:_(s32) = COPY $w2 + ; CHECK-NEXT: %add:_(s32) = G_ADD %y, %x + ; CHECK-NEXT: %sub:_(s32) = G_SUB %add, %z + ; CHECK-NEXT: $w0 = COPY %sub(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %add:_(s32) = G_ADD %y, %x + %sub:_(s32) = G_SUB %add, %z + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 +... +--- +name: unique_constants_no_fold +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (x + y) - x -> y + ; CHECK-LABEL: name: unique_constants_no_fold + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x1:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %x2:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: %add:_(s32) = G_ADD %x1, %y + ; CHECK-NEXT: %sub:_(s32) = G_SUB %add, %x2 + ; CHECK-NEXT: $w0 = COPY %sub(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x1:_(s32) = G_CONSTANT i32 1 + %x2:_(s32) = G_CONSTANT i32 2 + %y:_(s32) = COPY $w1 + %add:_(s32) = G_ADD %x1, %y + %sub:_(s32) = G_SUB %add, %x2 + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 +... +--- +name: simplify_to_neg_y +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; x - (y + x) -> 0 - y + ; CHECK-LABEL: name: simplify_to_neg_y + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %sub:_(s32) = G_SUB [[C]], %y + ; CHECK-NEXT: $w0 = COPY %sub(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %add:_(s32) = G_ADD %x, %y + %sub:_(s32) = G_SUB %x, %add + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 +... +--- +name: simplify_to_neg_x +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; y - (y + x) -> 0 - x + ; CHECK-LABEL: name: simplify_to_neg_x + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %sub:_(s32) = G_SUB [[C]], %x + ; CHECK-NEXT: $w0 = COPY %sub(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %add:_(s32) = G_ADD %x, %y + %sub:_(s32) = G_SUB %y, %add + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 +... +--- +name: simplify_to_neg_y_constant +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; x - (y + x) -> 0 - y + ; CHECK-LABEL: name: simplify_to_neg_y_constant + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %sub:_(s32) = G_SUB [[C]], %y + ; CHECK-NEXT: $w0 = COPY %sub(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x1:_(s32) = G_CONSTANT i32 1 + %x2:_(s32) = G_CONSTANT i32 1 + %y:_(s32) = COPY $w1 + %add:_(s32) = G_ADD %x1, %y + %sub:_(s32) = G_SUB %x2, %add + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 +... +--- +name: simplify_to_neg_x_constant +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; y - (y + x) -> 0 - x + ; CHECK-LABEL: name: simplify_to_neg_x_constant + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %sub:_(s32) = G_SUB [[C]], %x + ; CHECK-NEXT: $w0 = COPY %sub(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y1:_(s32) = G_CONSTANT i32 1 + %y2:_(s32) = G_CONSTANT i32 1 + %add:_(s32) = G_ADD %x, %y1 + %sub:_(s32) = G_SUB %y2, %add + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 +... +--- +name: vector_simplify_to_neg_x +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; y - (y + x) -> 0 - x + ; CHECK-LABEL: name: vector_simplify_to_neg_x + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: %sub:_(<2 x s32>) = G_SUB [[BUILD_VECTOR]], %x + ; CHECK-NEXT: $d0 = COPY %sub(<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %x:_(<2 x s32>) = COPY $d0 + %y:_(<2 x s32>) = COPY $d1 + %add:_(<2 x s32>) = G_ADD %x, %y + %sub:_(<2 x s32>) = G_SUB %y, %add + $d0 = COPY %sub(<2 x s32>) + RET_ReallyLR implicit $d0 +... +--- +name: vector_simplify_to_neg_y_constant +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; x - (y + x) -> 0 - y + ; CHECK-LABEL: name: vector_simplify_to_neg_y_constant + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %y:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: %sub:_(<2 x s32>) = G_SUB [[BUILD_VECTOR]], %y + ; CHECK-NEXT: $d0 = COPY %sub(<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %cst:_(s32) = G_CONSTANT i32 1 + %x1:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst + %x2:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst + %y:_(<2 x s32>) = COPY $d1 + %add:_(<2 x s32>) = G_ADD %x1, %y + %sub:_(<2 x s32>) = G_SUB %x2, %add + $d0 = COPY %sub(<2 x s32>) + RET_ReallyLR implicit $d0 +... +--- +name: unique_registers_neg_no_fold +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; y - (y + x) -> 0 - x + ; CHECK-LABEL: name: unique_registers_neg_no_fold + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: %z:_(s32) = COPY $w2 + ; CHECK-NEXT: %add:_(s32) = G_ADD %x, %y + ; CHECK-NEXT: %sub:_(s32) = G_SUB %z, %add + ; CHECK-NEXT: $w0 = COPY %sub(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %add:_(s32) = G_ADD %x, %y + %sub:_(s32) = G_SUB %z, %add + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 +... +--- +name: wrong_constant_neg_no_fold +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; x - (y + x) -> 0 - y + ; CHECK-LABEL: name: wrong_constant_neg_no_fold + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x1:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %x2:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: %add:_(s32) = G_ADD %x1, %y + ; CHECK-NEXT: %sub:_(s32) = G_SUB %x2, %add + ; CHECK-NEXT: $w0 = COPY %sub(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x1:_(s32) = G_CONSTANT i32 1 + %x2:_(s32) = G_CONSTANT i32 2 + %y:_(s32) = COPY $w1 + %add:_(s32) = G_ADD %x1, %y + %sub:_(s32) = G_SUB %x2, %add + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0