Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -234,6 +234,9 @@ /// Delete \p MI and replace all of its uses with its \p OpIdx-th operand. bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx); + /// Delete \p MI and replace all of its uses with \p Replacement. + bool replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement); + /// Return true if \p MOP1 and \p MOP2 are register operands are defined by /// equivalent instructions. bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2); @@ -260,6 +263,14 @@ bool applySimplifyAddToSub(MachineInstr &MI, std::tuple &MatchInfo); + /// \return true if \p MI is a G_AND instruction whose RHS is a mask where + /// LHS & mask == LHS. (E.g., an all-ones value.) + /// + /// \param [in] MI - The G_AND instruction. + /// \param [out] Reg - A register the G_AND should be replaced with on + /// success. + bool matchAndWithTrivialMask(MachineInstr &MI, Register &Replacement); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -275,6 +275,15 @@ (apply [{ return Helper.applyCombineP2IToI2P(*${root}, ${info}); }]) >; +// Fold (x & mask) -> x when (x & mask) is known to equal x. +def and_trivial_mask_matchinfo : GIDefMatchData<"Register">; +def and_trivial_mask: GICombineRule < + (defs root:$root, and_trivial_mask_matchinfo:$matchinfo), + (match (wip_match_opcode G_AND):$root, + [{ return Helper.matchAndWithTrivialMask(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1700,6 +1700,16 @@ return true; } +bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, + Register Replacement) { + assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); + Register OldReg = MI.getOperand(0).getReg(); + assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); + MI.eraseFromParent(); + replaceRegWith(MRI, OldReg, Replacement); + return true; +} + bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SELECT); // Match (cond ? x : x) @@ -1776,6 +1786,50 @@ return true; } +bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI, + Register &Replacement) { + // Given + // + // %mask:_(sN) = G_CONSTANT iN 000...0111...1 + // %x:_(sN) = G_SOMETHING + // %y:_(sN) = G_AND %x, %mask + // + // Eliminate the G_AND when it is known that x & mask == x. + // + // Patterns like this can appear as a result of legalization. E.g. + // + // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y + // %one:_(s32) = G_CONSTANT i32 1 + // %and:_(s32) = G_AND %cmp, %one + // + // In this case, G_ICMP only produces a single bit, so x & 1 == x. + assert(MI.getOpcode() == TargetOpcode::G_AND); + if (!KB) + return false; + + // Replacement = %x, AndDst = %y. Check that we can replace AndDst with the + // LHS of the G_AND. + Replacement = MI.getOperand(1).getReg(); + Register AndDst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(AndDst); + if (DstTy.isVector()) + return false; + if (!canReplaceReg(AndDst, Replacement, MRI)) + return false; + + // Check that we have a constant on the RHS of the G_AND, which is of the form + // 000...0111...1. + int64_t Cst; + if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Cst))) + return false; + APInt Mask(DstTy.getSizeInBits(), Cst); + if (!Mask.isMask()) + return false; + + // Now, let's check that x & Mask == x. If this is true, then x & ~Mask == 0. + return KB->maskedValueIsZero(Replacement, ~Mask); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; Index: llvm/lib/Target/AArch64/AArch64Combine.td =================================================================== --- llvm/lib/Target/AArch64/AArch64Combine.td +++ llvm/lib/Target/AArch64/AArch64Combine.td @@ -79,6 +79,7 @@ def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", [erase_undef_store, combines_for_extload, - sext_trunc_sextload, shuffle_vector_pseudos]> { + sext_trunc_sextload, shuffle_vector_pseudos, + and_trivial_mask]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } Index: llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-and-trivial-mask.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-and-trivial-mask.mir @@ -0,0 +1,222 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# Check that we can fold (x & mask) -> x when (x & mask) is known to equal x. +# +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +name: remove_and_with_one_bit +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; G_ICMP produces a single bit. The mask is 1. + ; + ; cmp = 000...0? + ; mask = 000...01 + ; cmp & mask = 000...0? + ; + ; Remove the G_AND. + ; + ; CHECK-LABEL: name: remove_and_with_one_bit + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y + ; CHECK: $w0 = COPY %cmp(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y + %mask:_(s32) = G_CONSTANT i32 1 + %and:_(s32) = G_AND %cmp(s32), %mask + $w0 = COPY %and(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: remove_and_all_ones_mask +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; -1 is all ones. Therefore z & -1 = z. Remove the G_AND. + ; + ; CHECK-LABEL: name: remove_and_all_ones_mask + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: $w0 = COPY %z(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %mask:_(s32) = G_CONSTANT i32 -1 + %and:_(s32) = G_AND %z(s32), %mask + $w0 = COPY %and(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: remove_and_all_ones_zext +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; %z is a s32, so it can be at most the all-ones value on 32 bits. + ; In decimal this is 4294967295. Any zero-extension of %z is at most this + ; value. + ; + ; Therefore, zext(z) & 4294967295 == z. Remove the G_AND. + ; + ; CHECK-LABEL: name: remove_and_all_ones_zext + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %ext:_(s64) = G_ZEXT %z(s32) + ; CHECK: $x0 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %ext:_(s64) = G_ZEXT %z + %mask:_(s64) = G_CONSTANT i64 4294967295 + %and:_(s64) = G_AND %ext(s64), %mask + $x0 = COPY %and(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: remove_and_all_ones_anyext +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; This is the same as the zext case. + ; + ; CHECK-LABEL: name: remove_and_all_ones_anyext + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %ext:_(s64) = G_ZEXT %z(s32) + ; CHECK: $x0 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %ext:_(s64) = G_ZEXT %z + %mask:_(s64) = G_CONSTANT i64 4294967295 + %and:_(s64) = G_AND %ext(s64), %mask + $x0 = COPY %and(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_remove_all_ones_sext +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; We don't know if the sign bit is set on %z. So, the value in %ext may have + ; higher bits set than 4294967295. + ; + ; CHECK-LABEL: name: dont_remove_all_ones_sext + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %ext:_(s64) = G_SEXT %z(s32) + ; CHECK: %mask:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: %and:_(s64) = G_AND %ext, %mask + ; CHECK: $x0 = COPY %and(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %ext:_(s64) = G_SEXT %z + %mask:_(s64) = G_CONSTANT i64 4294967295 + %and:_(s64) = G_AND %ext(s64), %mask + $x0 = COPY %and(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: remove_and_positive_constant_sext +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; We know the sign bit is not set on %z. Therefore, + ; + ; z = ext = 42 = 000...0101010 + ; mask = 0000...0111111 + ; + ; So z & mask == z + ; CHECK-LABEL: name: remove_and_positive_constant_sext + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = G_CONSTANT i32 42 + ; CHECK: %ext:_(s64) = G_SEXT %z(s32) + ; CHECK: $x0 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = G_CONSTANT i32 42 + %ext:_(s64) = G_SEXT %z + %mask:_(s64) = G_CONSTANT i64 63 + %and:_(s64) = G_AND %ext(s64), %mask + $x0 = COPY %and(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: not_a_mask +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; 6 is not a mask, so we should still have the G_AND. + ; + ; CHECK-LABEL: name: not_a_mask + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y + ; CHECK: %mask:_(s32) = G_CONSTANT i32 6 + ; CHECK: %and:_(s32) = G_AND %cmp, %mask + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y + %mask:_(s32) = G_CONSTANT i32 6 + %and:_(s32) = G_AND %cmp(s32), %mask + $w0 = COPY %and(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: unknown_val +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; We don't know what's in $w2, so we can't remove the G_AND without a mask + ; that fills every bit in the type. + ; + ; CHECK-LABEL: name: unknown_val + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %one:_(s32) = G_CONSTANT i32 32 + ; CHECK: %and:_(s32) = G_AND %z, %one + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %one:_(s32) = G_CONSTANT i32 32 + %and:_(s32) = G_AND %z(s32), %one + $w0 = COPY %and(s32) + RET_ReallyLR implicit $w0 +...