diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -435,6 +435,11 @@ std::tuple &MatchInfo); bool applyAshShlToSextInreg(MachineInstr &MI, std::tuple &MatchInfo); + + /// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0 + bool matchOverlappingAnd(MachineInstr &MI, + std::function &MatchInfo); + /// \return true if \p MI is a G_AND instruction whose operands are x and y /// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.) /// diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -402,6 +402,15 @@ [{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]), (apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}]) >; + +// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0 +def overlapping_and: GICombineRule < + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_AND):$root, + [{ return Helper.matchOverlappingAnd(*${root}, ${info}); }]), + (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }]) +>; + // Fold (x & y) -> x or (x & y) -> y when (x & y) is known to equal x or equal y. def redundant_and: GICombineRule < (defs root:$root, register_matchinfo:$matchinfo), @@ -637,7 +646,8 @@ i2p_to_p2i, anyext_trunc_fold, fneg_fneg_fold, right_identity_one]>; -def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p]>; +def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, + overlapping_and]>; def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2997,6 +2997,33 @@ return true; } +/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0 +bool CombinerHelper::matchOverlappingAnd( + MachineInstr &MI, std::function &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_AND); + + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + + Register R; + int64_t C1; + int64_t C2; + if (!mi_match( + Dst, MRI, + m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2)))) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + if (C1 & C2) { + B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2)); + return; + } + auto Zero = B.buildConstant(Ty, 0); + replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg()); + }; + return true; +} + bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, Register &Replacement) { // Given diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.mir @@ -0,0 +1,270 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -debugify-and-strip-all-safe -mtriple arm64-apple-ios -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="overlapping_and" -global-isel -verify-machineinstrs %s -o - | FileCheck %s +--- | + ; ModuleID = '' + source_filename = "" + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "arm64-apple-ios" + + ; Function Attrs: nounwind + define i32 @bitmask_overlap1(i32 %x) #0 { + %y = and i32 %x, -128 + %z = and i32 %y, 255 + ret i32 %z + } + + ; Function Attrs: nounwind + define i32 @bitmask_overlap2(i32 %x) #0 { + %y = and i32 255, %x + %z = and i32 %y, -128 + ret i32 %z + } + + ; Function Attrs: nounwind + define i32 @bitmask_overlap3(i32 %x) #0 { + %y = and i32 255, %x + %z = and i32 -128, %y + ret i32 %z + } + + ; Function Attrs: nounwind + define i32 @bitmask_overlap4(i32 %x) #0 { + %y = and i32 %x, 255 + %z = and i32 -128, %y + ret i32 %z + } + + ; Function Attrs: nounwind + define i32 @bitmask_no_overlap(i32 %x) #0 { + %y = and i32 %x, 1 + %z = and i32 %y, 2 + ret i32 %z + } + + ; Function Attrs: nounwind + define i32 @bitmask_overlap_extrause(i32 %x, i32* %p) #0 { + %y = and i32 %x, 255 + %z = and i32 %y, -128 + store i32 %y, i32* %p, align 4 + ret i32 %z + } + + attributes #0 = { nounwind } + +... +--- +name: bitmask_overlap1 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: bitmask_overlap1 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 -128 + %3:_(s32) = G_CONSTANT i32 255 + %2:_(s32) = G_AND %0, %1 + %4:_(s32) = G_AND %2, %3 + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: bitmask_overlap2 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: bitmask_overlap2 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 255 + %3:_(s32) = G_CONSTANT i32 -128 + %2:_(s32) = G_AND %1, %0 + %4:_(s32) = G_AND %2, %3 + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: bitmask_overlap3 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: bitmask_overlap3 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 255 + %3:_(s32) = G_CONSTANT i32 -128 + %2:_(s32) = G_AND %1, %0 + %4:_(s32) = G_AND %3, %2 + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: bitmask_overlap4 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: bitmask_overlap4 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 255 + %3:_(s32) = G_CONSTANT i32 -128 + %2:_(s32) = G_AND %0, %1 + %4:_(s32) = G_AND %3, %2 + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: bitmask_no_overlap +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: bitmask_no_overlap + ; CHECK: liveins: $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: $w0 = COPY [[C]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_CONSTANT i32 2 + %2:_(s32) = G_AND %0, %1 + %4:_(s32) = G_AND %2, %3 + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: bitmask_overlap_extrause +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w0, $x1 + + ; CHECK-LABEL: name: bitmask_overlap_extrause + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK: G_STORE [[AND]](s32), [[COPY1]](p0) :: (store 4 into %ir.p) + ; CHECK: $w0 = COPY [[AND1]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(p0) = COPY $x1 + %2:_(s32) = G_CONSTANT i32 255 + %4:_(s32) = G_CONSTANT i32 -128 + %3:_(s32) = G_AND %0, %2 + %5:_(s32) = G_AND %3, %4 + G_STORE %3(s32), %1(p0) :: (store 4 into %ir.p) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll @@ -547,21 +547,18 @@ ; ; GFX8-LABEL: s_shl_i32_zext_i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_and_b32 s0, s0, 0x3fff ; GFX8-NEXT: s_lshl_b32 s0, s0, 2 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl_i32_zext_i16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_and_b32 s0, s0, 0xffff ; GFX9-NEXT: s_and_b32 s0, s0, 0x3fff ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_shl_i32_zext_i16: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_and_b32 s0, s0, 0xffff ; GFX10-NEXT: s_and_b32 s0, s0, 0x3fff ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: ; return to shader part epilog