diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -263,6 +263,9 @@ /// Delete \p MI and replace all of its uses with its \p OpIdx-th operand. bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx); + /// Delete \p MI and replace all of its uses with \p Replacement. + bool replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement); + /// Return true if \p MOP1 and \p MOP2 are register operands are defined by /// equivalent instructions. bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2); @@ -303,6 +306,13 @@ std::tuple &MatchInfo); bool applyAshShlToSextInreg(MachineInstr &MI, std::tuple &MatchInfo); + /// \return true if \p MI is a G_AND instruction whose RHS is a mask where + /// LHS & mask == LHS. (E.g., an all-ones value.) + /// + /// \param [in] MI - The G_AND instruction. + /// \param [out] Reg - A register the G_AND should be replaced with on + /// success. + bool matchAndWithTrivialMask(MachineInstr &MI, Register &Replacement); /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -299,6 +299,14 @@ [{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]), (apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}]) >; +// Fold (x & mask) -> x when (x & mask) is known to equal x. +def and_trivial_mask_matchinfo : GIDefMatchData<"Register">; +def and_trivial_mask: GICombineRule < + (defs root:$root, and_trivial_mask_matchinfo:$matchinfo), + (match (wip_match_opcode G_AND):$root, + [{ return Helper.matchAndWithTrivialMask(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) +>; // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1766,6 +1766,16 @@ return true; } +bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, + Register Replacement) { + assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); + Register OldReg = MI.getOperand(0).getReg(); + assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); + MI.eraseFromParent(); + replaceRegWith(MRI, OldReg, Replacement); + return true; +} + bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SELECT); // Match (cond ? x : x) @@ -1979,6 +1989,52 @@ return true; } +bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI, + Register &Replacement) { + // Given + // + // %mask:_(sN) = G_CONSTANT iN 000...0111...1 + // %x:_(sN) = G_SOMETHING + // %y:_(sN) = G_AND %x, %mask + // + // Eliminate the G_AND when it is known that x & mask == x. + // + // Patterns like this can appear as a result of legalization. E.g. + // + // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y + // %one:_(s32) = G_CONSTANT i32 1 + // %and:_(s32) = G_AND %cmp, %one + // + // In this case, G_ICMP only produces a single bit, so x & 1 == x. + assert(MI.getOpcode() == TargetOpcode::G_AND); + if (!KB) + return false; + + // Replacement = %x, AndDst = %y. Check that we can replace AndDst with the + // LHS of the G_AND. + Replacement = MI.getOperand(1).getReg(); + Register AndDst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(AndDst); + + // FIXME: This should be removed once GISelKnownBits supports vectors. + if (DstTy.isVector()) + return false; + if (!canReplaceReg(AndDst, Replacement, MRI)) + return false; + + // Check that we have a constant on the RHS of the G_AND, which is of the form + // 000...0111...1. + int64_t Cst; + if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Cst))) + return false; + APInt Mask(DstTy.getSizeInBits(), Cst); + if (!Mask.isMask()) + return false; + + // Now, let's check that x & Mask == x. If this is true, then x & ~Mask == 0. + return KB->maskedValueIsZero(Replacement, ~Mask); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -80,6 +80,7 @@ : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", [copy_prop, erase_undef_store, combines_for_extload, sext_trunc_sextload, shuffle_vector_pseudos, - hoist_logic_op_with_same_opcode_hands]> { + hoist_logic_op_with_same_opcode_hands, + and_trivial_mask]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-and-trivial-mask.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-and-trivial-mask.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-and-trivial-mask.mir @@ -0,0 +1,222 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# Check that we can fold (x & mask) -> x when (x & mask) is known to equal x. +# +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +name: remove_and_with_one_bit +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; G_ICMP produces a single bit. The mask is 1. + ; + ; cmp = 000...0? + ; mask = 000...01 + ; cmp & mask = 000...0? + ; + ; Remove the G_AND. + ; + ; CHECK-LABEL: name: remove_and_with_one_bit + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y + ; CHECK: $w0 = COPY %cmp(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y + %mask:_(s32) = G_CONSTANT i32 1 + %and:_(s32) = G_AND %cmp(s32), %mask + $w0 = COPY %and(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: remove_and_all_ones_mask +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; -1 is all ones. Therefore z & -1 = z. Remove the G_AND. + ; + ; CHECK-LABEL: name: remove_and_all_ones_mask + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: $w0 = COPY %z(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %mask:_(s32) = G_CONSTANT i32 -1 + %and:_(s32) = G_AND %z(s32), %mask + $w0 = COPY %and(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: remove_and_all_ones_zext +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; %z is a s32, so it can be at most the all-ones value on 32 bits. + ; In decimal this is 4294967295. Any zero-extension of %z is at most this + ; value. + ; + ; Therefore, zext(z) & 4294967295 == z. Remove the G_AND. + ; + ; CHECK-LABEL: name: remove_and_all_ones_zext + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %ext:_(s64) = G_ZEXT %z(s32) + ; CHECK: $x0 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %ext:_(s64) = G_ZEXT %z + %mask:_(s64) = G_CONSTANT i64 4294967295 + %and:_(s64) = G_AND %ext(s64), %mask + $x0 = COPY %and(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: remove_and_all_ones_anyext +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; This is the same as the zext case. + ; + ; CHECK-LABEL: name: remove_and_all_ones_anyext + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %ext:_(s64) = G_ZEXT %z(s32) + ; CHECK: $x0 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %ext:_(s64) = G_ZEXT %z + %mask:_(s64) = G_CONSTANT i64 4294967295 + %and:_(s64) = G_AND %ext(s64), %mask + $x0 = COPY %and(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_remove_all_ones_sext +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; We don't know if the sign bit is set on %z. So, the value in %ext may have + ; higher bits set than 4294967295. + ; + ; CHECK-LABEL: name: dont_remove_all_ones_sext + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %ext:_(s64) = G_SEXT %z(s32) + ; CHECK: %mask:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: %and:_(s64) = G_AND %ext, %mask + ; CHECK: $x0 = COPY %and(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %ext:_(s64) = G_SEXT %z + %mask:_(s64) = G_CONSTANT i64 4294967295 + %and:_(s64) = G_AND %ext(s64), %mask + $x0 = COPY %and(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: remove_and_positive_constant_sext +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; We know the sign bit is not set on %z. Therefore, + ; + ; z = ext = 42 = 000...0101010 + ; mask = 0000...0111111 + ; + ; So z & mask == z + ; CHECK-LABEL: name: remove_and_positive_constant_sext + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = G_CONSTANT i32 42 + ; CHECK: %ext:_(s64) = G_SEXT %z(s32) + ; CHECK: $x0 = COPY %ext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = G_CONSTANT i32 42 + %ext:_(s64) = G_SEXT %z + %mask:_(s64) = G_CONSTANT i64 63 + %and:_(s64) = G_AND %ext(s64), %mask + $x0 = COPY %and(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: not_a_mask +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; 6 is not a mask, so we should still have the G_AND. + ; + ; CHECK-LABEL: name: not_a_mask + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y + ; CHECK: %mask:_(s32) = G_CONSTANT i32 6 + ; CHECK: %and:_(s32) = G_AND %cmp, %mask + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y + %mask:_(s32) = G_CONSTANT i32 6 + %and:_(s32) = G_AND %cmp(s32), %mask + $w0 = COPY %and(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: unknown_val +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; We don't know what's in $w2, so we can't remove the G_AND without a mask + ; that fills every bit in the type. + ; + ; CHECK-LABEL: name: unknown_val + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %z:_(s32) = COPY $w2 + ; CHECK: %one:_(s32) = G_CONSTANT i32 32 + ; CHECK: %and:_(s32) = G_AND %z, %one + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %z:_(s32) = COPY $w2 + %one:_(s32) = G_CONSTANT i32 32 + %and:_(s32) = G_AND %z(s32), %one + $w0 = COPY %and(s32) + RET_ReallyLR implicit $w0 +...