diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -415,6 +415,14 @@ bool matchNotCmp(MachineInstr &MI, SmallVectorImpl &RegsToNegate); bool applyNotCmp(MachineInstr &MI, SmallVectorImpl &RegsToNegate); + /// Fold (xor (and x, y), y) -> (and (not x), y) + ///{ + bool matchXorOfAndWithSameReg(MachineInstr &MI, + std::pair &MatchInfo); + bool applyXorOfAndWithSameReg(MachineInstr &MI, + std::pair &MatchInfo); + ///} + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -480,6 +480,16 @@ (apply [{ return Helper.applyCombineMulByNegativeOne(*${root}); }]) >; +// Fold (xor (and x, y), y) -> (and (not x), y) +def xor_of_and_with_same_reg_matchinfo : + GIDefMatchData<"std::pair">; +def xor_of_and_with_same_reg: GICombineRule < + (defs root:$root, xor_of_and_with_same_reg_matchinfo:$matchinfo), + (match (wip_match_opcode G_XOR):$root, + [{ return Helper.matchXorOfAndWithSameReg(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyXorOfAndWithSameReg(*${root}, ${matchinfo}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -515,4 +525,4 @@ not_cmp_fold, opt_brcond_by_inverting_cond, unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc, unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, - constant_fp_op]>; + constant_fp_op, xor_of_and_with_same_reg]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2720,6 +2720,52 @@ return true; } +bool CombinerHelper::matchXorOfAndWithSameReg( + MachineInstr &MI, std::pair &MatchInfo) { + // Match (xor (and x, y), y) (or any of its commuted cases) + assert(MI.getOpcode() == TargetOpcode::G_XOR); + Register &X = MatchInfo.first; + Register &Y = MatchInfo.second; + Register AndReg = MI.getOperand(1).getReg(); + Register SharedReg = MI.getOperand(2).getReg(); + + // Find a G_AND on either side of the G_XOR. + // Look for one of + // + // (xor (and x, y), SharedReg) + // (xor SharedReg, (and x, y)) + if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) { + std::swap(AndReg, SharedReg); + if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) + return false; + } + + // Only do this if we'll eliminate the G_AND. + if (!MRI.hasOneNonDBGUse(AndReg)) + return false; + + // We can combine if SharedReg is the same as either the LHS or RHS of the + // G_AND. + if (Y != SharedReg) + std::swap(X, Y); + return Y == SharedReg; +} + +bool CombinerHelper::applyXorOfAndWithSameReg( + MachineInstr &MI, std::pair &MatchInfo) { + // Fold (xor (and x, y), y) -> (and (not x), y) + Builder.setInstrAndDebugLoc(MI); + Register X, Y; + std::tie(X, Y) = MatchInfo; + auto Not = Builder.buildNot(MRI.getType(X), X); + Observer.changingInstr(MI); + MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND)); + MI.getOperand(1).setReg(Not->getOperand(0).getReg()); + MI.getOperand(2).setReg(Y); + Observer.changedInstr(MI); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -89,6 +89,6 @@ [copy_prop, erase_undef_store, combines_for_extload, sext_trunc_sextload, shuffle_vector_pseudos, hoist_logic_op_with_same_opcode_hands, - and_trivial_mask, vashr_vlshr_imm]> { + and_trivial_mask, vashr_vlshr_imm, xor_of_and_with_same_reg]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-xor-of-and-with-same-reg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-xor-of-and-with-same-reg.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-xor-of-and-with-same-reg.mir @@ -0,0 +1,177 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py + +# RUN: llc -mtriple aarch64 -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: fold_scalar +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (xor (and x, y), y) -> (and (not x), y) + ; CHECK-LABEL: name: fold_scalar + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]] + ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y + ; CHECK: $w0 = COPY %xor(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %and:_(s32) = G_AND %x, %y + %xor:_(s32) = G_XOR %and, %y + $w0 = COPY %xor(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: fold_vector +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; Vector edition + ; CHECK-LABEL: name: fold_vector + ; CHECK: liveins: $x0, $x1 + ; CHECK: %x:_(<2 x s32>) = COPY $x0 + ; CHECK: %y:_(<2 x s32>) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR %x, [[BUILD_VECTOR]] + ; CHECK: %xor:_(<2 x s32>) = G_AND [[XOR]], %y + ; CHECK: $x0 = COPY %xor(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(<2 x s32>) = COPY $x0 + %y:_(<2 x s32>) = COPY $x1 + %and:_(<2 x s32>) = G_AND %x, %y + %xor:_(<2 x s32>) = G_XOR %and, %y + $x0 = COPY %xor(<2 x s32>) + RET_ReallyLR implicit $x0 + +... +--- +name: fold_commuted_and +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (xor (and y, x), y) -> (and (not x), y) + ; CHECK-LABEL: name: fold_commuted_and + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]] + ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y + ; CHECK: $w0 = COPY %xor(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %and:_(s32) = G_AND %y, %x + %xor:_(s32) = G_XOR %and, %y + $w0 = COPY %xor(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: fold_commuted_xor +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (xor y, (and x, y)) -> (and (not x), y) + ; CHECK-LABEL: name: fold_commuted_xor + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]] + ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y + ; CHECK: $w0 = COPY %xor(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %and:_(s32) = G_AND %x, %y + %xor:_(s32) = G_XOR %y, %and + $w0 = COPY %xor(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: fold_commuted_xor_and +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (xor y, (and x, y)) -> (and (not x), y) + ; CHECK-LABEL: name: fold_commuted_xor_and + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]] + ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y + ; CHECK: $w0 = COPY %xor(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %and:_(s32) = G_AND %y, %x + %xor:_(s32) = G_XOR %y, %and + $w0 = COPY %xor(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_fold_different_regs +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; The G_AND does not share any registers with the G_XOR + ; CHECK-LABEL: name: dont_fold_different_regs + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %and:_(s32) = G_AND %x, %z + ; CHECK: %xor:_(s32) = G_XOR %and, %y + ; CHECK: $w0 = COPY %xor(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %and:_(s32) = G_AND %x, %z + %xor:_(s32) = G_XOR %and, %y + $w0 = COPY %xor(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_fold_more_than_one_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; Don't fold when the G_AND is used outside the G_XOR. + ; + ; CHECK-LABEL: name: dont_fold_more_than_one_use + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %and:_(s32) = G_AND %x, %z + ; CHECK: %xor:_(s32) = G_XOR %and, %y + ; CHECK: %add:_(s32) = G_ADD %and, %xor + ; CHECK: $w0 = COPY %add(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %and:_(s32) = G_AND %x, %z + %xor:_(s32) = G_XOR %and, %y + %add:_(s32) = G_ADD %and, %xor + $w0 = COPY %add(s32) + RET_ReallyLR implicit $w0