Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -408,6 +408,14 @@ bool matchNotCmp(MachineInstr &MI, SmallVectorImpl &RegsToNegate); bool applyNotCmp(MachineInstr &MI, SmallVectorImpl &RegsToNegate); + /// Fold (xor (and x, y), y) -> (and (not x), y) + ///{ + bool matchXorOfAndWithSameReg(MachineInstr &MI, + std::pair &MatchInfo); + bool applyXorOfAndWithSameReg(MachineInstr &MI, + std::pair &MatchInfo); + ///} + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -471,6 +471,16 @@ (apply [{ return Helper.applyCombineMulByNegativeOne(*${root}); }]) >; +// Fold (xor (and x, y), y) -> (and (not x), y) +def xor_of_and_with_same_reg_matchinfo : + GIDefMatchData<"std::pair">; +def xor_of_and_with_same_reg: GICombineRule < + (defs root:$root, xor_of_and_with_same_reg_matchinfo:$matchinfo), + (match (wip_match_opcode G_XOR):$root, + [{ return Helper.matchXorOfAndWithSameReg(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyXorOfAndWithSameReg(*${root}, ${matchinfo}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -505,4 +515,4 @@ known_bits_simplifications, ext_ext_fold, not_cmp_fold, opt_brcond_by_inverting_cond, unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc, - unmerge_zext_to_zext, trunc_ext_fold, trunc_shl]>; + unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, xor_of_and_with_same_reg]>; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2658,6 +2658,40 @@ return true; } +bool CombinerHelper::matchXorOfAndWithSameReg( + MachineInstr &MI, std::pair &MatchInfo) { + // Match (xor (and x, y), y) (or any of its commuted cases) + assert(MI.getOpcode() == TargetOpcode::G_XOR); + Register &X = MatchInfo.first; + Register &Y = MatchInfo.second; + Register AndReg = MI.getOperand(1).getReg(); + Register SharedReg = MI.getOperand(2).getReg(); + if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) { + std::swap(AndReg, SharedReg); + if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) + return false; + } + + // Only do this if we'll eliminate the G_AND. + if (!MRI.hasOneNonDBGUse(AndReg)) + return false; + + if (Y != SharedReg) + std::swap(X, Y); + return Y == SharedReg; +} + +bool CombinerHelper::applyXorOfAndWithSameReg( + MachineInstr &MI, std::pair &MatchInfo) { + // Fold (xor (and x, y), y) -> (and (not x), y) + Builder.setInstrAndDebugLoc(MI); + Register X, Y; + std::tie(X, Y) = MatchInfo; + Builder.buildAnd(MI.getOperand(0), Builder.buildNot(MRI.getType(X), X), Y); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; Index: llvm/lib/Target/AArch64/AArch64Combine.td =================================================================== --- llvm/lib/Target/AArch64/AArch64Combine.td +++ llvm/lib/Target/AArch64/AArch64Combine.td @@ -80,6 +80,6 @@ [copy_prop, erase_undef_store, combines_for_extload, sext_trunc_sextload, shuffle_vector_pseudos, hoist_logic_op_with_same_opcode_hands, - and_trivial_mask]> { + and_trivial_mask, xor_of_and_with_same_reg]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-xor-of-and-with-same-reg.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-xor-of-and-with-same-reg.mir @@ -0,0 +1,177 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py + +# RUN: llc -mtriple aarch64 -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: fold_scalar +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (xor (and x, y), y) -> (and (not x), y) + ; CHECK-LABEL: name: fold_scalar + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]] + ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y + ; CHECK: $w0 = COPY %xor(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %and:_(s32) = G_AND %x, %y + %xor:_(s32) = G_XOR %and, %y + $w0 = COPY %xor(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: fold_vector +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; Vector edition + ; CHECK-LABEL: name: fold_vector + ; CHECK: liveins: $x0, $x1 + ; CHECK: %x:_(<2 x s32>) = COPY $x0 + ; CHECK: %y:_(<2 x s32>) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR %x, [[BUILD_VECTOR]] + ; CHECK: %xor:_(<2 x s32>) = G_AND [[XOR]], %y + ; CHECK: $x0 = COPY %xor(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(<2 x s32>) = COPY $x0 + %y:_(<2 x s32>) = COPY $x1 + %and:_(<2 x s32>) = G_AND %x, %y + %xor:_(<2 x s32>) = G_XOR %and, %y + $x0 = COPY %xor(<2 x s32>) + RET_ReallyLR implicit $x0 + +... +--- +name: fold_commuted_and +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (xor (and y, x), y) -> (and (not x), y) + ; CHECK-LABEL: name: fold_commuted_and + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]] + ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y + ; CHECK: $w0 = COPY %xor(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %and:_(s32) = G_AND %y, %x + %xor:_(s32) = G_XOR %and, %y + $w0 = COPY %xor(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: fold_commuted_xor +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (xor y, (and x, y)) -> (and (not x), y) + ; CHECK-LABEL: name: fold_commuted_xor + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]] + ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y + ; CHECK: $w0 = COPY %xor(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %and:_(s32) = G_AND %x, %y + %xor:_(s32) = G_XOR %y, %and + $w0 = COPY %xor(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: fold_commuted_xor_and +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; (xor y, (and x, y)) -> (and (not x), y) + ; CHECK-LABEL: name: fold_commuted_xor_and + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]] + ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y + ; CHECK: $w0 = COPY %xor(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %and:_(s32) = G_AND %y, %x + %xor:_(s32) = G_XOR %y, %and + $w0 = COPY %xor(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_fold_different_regs +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; The G_AND does not share any registers with the G_XOR + ; CHECK-LABEL: name: dont_fold_different_regs + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %and:_(s32) = G_AND %x, %z + ; CHECK: %xor:_(s32) = G_XOR %and, %y + ; CHECK: $w0 = COPY %xor(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %and:_(s32) = G_AND %x, %z + %xor:_(s32) = G_XOR %and, %y + $w0 = COPY %xor(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_fold_more_than_one_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; Don't fold when the G_AND is used outside the G_XOR. + ; + ; CHECK-LABEL: name: dont_fold_more_than_one_use + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %and:_(s32) = G_AND %x, %z + ; CHECK: %xor:_(s32) = G_XOR %and, %y + ; CHECK: %add:_(s32) = G_ADD %and, %xor + ; CHECK: $w0 = COPY %add(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %and:_(s32) = G_AND %x, %z + %xor:_(s32) = G_XOR %and, %y + %add:_(s32) = G_ADD %and, %xor + $w0 = COPY %add(s32) + RET_ReallyLR implicit $w0