diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -435,6 +435,11 @@ std::tuple &MatchInfo); bool applyAshShlToSextInreg(MachineInstr &MI, std::tuple &MatchInfo); + + /// Match and(and(x, C1), C2) -> and(x, C1 & C2) + bool matchOverlappingAnd(MachineInstr &MI, + std::function &MatchInfo); + /// \return true if \p MI is a G_AND instruction whose operands are x and y /// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.) /// diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -409,6 +409,12 @@ [{ return Helper.matchRedundantAnd(*${root}, ${matchinfo}); }]), (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) >; +def overlapping_and: GICombineRule < + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_AND):$root, + [{ return Helper.matchOverlappingAnd(*${root}, ${info}); }]), + (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }]) +>; // Fold (x | y) -> x or (x | y) -> y when (x | y) is known to equal x or equal y. def redundant_or: GICombineRule < @@ -641,7 +647,7 @@ def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, - zext_trunc_fold, icmp_to_true_false_known_bits]>; + zext_trunc_fold, icmp_to_true_false_known_bits, overlapping_and]>; def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2997,6 +2997,36 @@ return true; } +/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : copy(x, 0) +bool CombinerHelper::matchOverlappingAnd( + MachineInstr &MI, std::function &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_AND); + + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + + // FIXME: This should be removed once GISelKnownBits supports vectors. + if (Ty.isVector()) + return false; + + Register R; + int64_t C1; + int64_t C2; + if (!mi_match( + Dst, MRI, + m_GAnd(m_OneNonDBGUse(m_GAnd(m_Reg(R), m_ICst(C1))), m_ICst(C2)))) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + if (C1 & C2) { + B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2)); + } else { + B.buildCopy(Dst, B.buildConstant(Ty, 0)); + } + }; + return true; +} + bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, Register &Replacement) { // Given diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.ll b/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -o - -verify-machineinstrs -global-isel=1 | FileCheck %s +; RUN: llc < %s -o - -verify-machineinstrs -global-isel=0 | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios" + +define i32 @bitmask_overlap1(i32 %x) nounwind { +; CHECK-LABEL: bitmask_overlap1: +; CHECK: ; %bb.0: +; CHECK-NEXT: and w0, w0, #0x80 +; CHECK-NEXT: ret + %y = and i32 %x, 4294967168 + %z = and i32 %y, 255 + ret i32 %z +} + +define i32 @bitmask_overlap2(i32 %x) nounwind { +; CHECK-LABEL: bitmask_overlap2: +; CHECK: ; %bb.0: +; CHECK-NEXT: and w0, w0, #0x80 +; CHECK-NEXT: ret + %y = and i32 255, %x + %z = and i32 %y, 4294967168 + ret i32 %z +} + +define i32 @bitmask_overlap3(i32 %x) nounwind { +; CHECK-LABEL: bitmask_overlap3: +; CHECK: ; %bb.0: +; CHECK-NEXT: and w0, w0, #0x80 +; CHECK-NEXT: ret + %y = and i32 255, %x + %z = and i32 4294967168, %y + ret i32 %z +} + +define i32 @bitmask_overlap4(i32 %x) nounwind { +; CHECK-LABEL: bitmask_overlap4: +; CHECK: ; %bb.0: +; CHECK-NEXT: and w0, w0, #0x80 +; CHECK-NEXT: ret + %y = and i32 %x, 255 + %z = and i32 4294967168, %y + ret i32 %z +} + +define i32 @bitmask_no_overlap(i32 %x) nounwind { +; CHECK-LABEL: bitmask_no_overlap: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %y = and i32 %x, 1 + %z = and i32 %y, 2 + ret i32 %z +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll @@ -547,21 +547,18 @@ ; ; GFX8-LABEL: s_shl_i32_zext_i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_and_b32 s0, s0, 0x3fff ; GFX8-NEXT: s_lshl_b32 s0, s0, 2 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl_i32_zext_i16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_and_b32 s0, s0, 0xffff ; GFX9-NEXT: s_and_b32 s0, s0, 0x3fff ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_shl_i32_zext_i16: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_and_b32 s0, s0, 0xffff ; GFX10-NEXT: s_and_b32 s0, s0, 0x3fff ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: ; return to shader part epilog