diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -517,6 +517,10 @@ /// or false constant based off of KnownBits information. bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo); + /// Match: and (lshr x, cst), mask -> ubfx x, cst, width + bool matchBitfieldExtractFromAnd( + MachineInstr &MI, std::function &MatchInfo); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1793,6 +1793,12 @@ /// Should be used only when getIRStackGuard returns nullptr. virtual Function *getSSPStackGuardCheck(const Module &M) const; + /// \returns true if a constant G_UBFX is legal on the target. + virtual bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1, + LLT Ty2) const { + return false; + } + protected: Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, bool UseTLS) const; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -620,6 +620,12 @@ [{ return Helper.matchICmpToTrueFalseKnownBits(*${d}, ${matchinfo}); }]), (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>; +def bitfield_extract_from_and : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_AND):$root, + [{ return Helper.matchBitfieldExtractFromAnd(*${root}, ${info}); }]), + (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>; + def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>; // FIXME: These should use the custom predicate feature once it lands. @@ -664,7 +670,7 @@ unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, - div_rem_to_divrem, funnel_shift_combines]>; + div_rem_to_divrem, funnel_shift_combines, bitfield_extract_from_and]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3986,6 +3986,41 @@ return true; } +bool CombinerHelper::matchBitfieldExtractFromAnd( + MachineInstr &MI, std::function &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_AND); + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal( + TargetOpcode::G_UBFX, Ty, Ty)) + return false; + + int64_t AndImm, LSBImm; + Register ShiftSrc; + const unsigned Size = Ty.getScalarSizeInBits(); + if (!mi_match(MI.getOperand(0).getReg(), MRI, + m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))), + m_ICst(AndImm)))) + return false; + + // The mask is a mask of the low bits iff imm & (imm+1) == 0. + auto MaybeMask = static_cast(AndImm); + if (MaybeMask & (MaybeMask + 1)) + return false; + + // LSB must fit within the register. + if (static_cast(LSBImm) >= Size) + return false; + + uint64_t Width = APInt(Size, AndImm).countTrailingOnes(); + MatchInfo = [=](MachineIRBuilder &B) { + auto WidthCst = B.buildConstant(Ty, Width); + auto LSBCst = B.buildConstant(Ty, LSBImm); + B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst}); + }; + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -174,7 +174,8 @@ [{ return matchBitfieldExtractFromSExtInReg(*${root}, MRI, ${info}); }]), (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>; -def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg]>; +def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg, + bitfield_extract_from_and]>; def lower_vector_fcmp : GICombineRule< (defs root:$root), diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1100,6 +1100,9 @@ // to transition between unpacked and packed types of the same element type, // with BITCAST used otherwise. SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; + + bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1, + LLT Ty2) const override; }; namespace AArch64 { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18179,3 +18179,8 @@ return TargetLowering::SimplifyDemandedBitsForTargetNode( Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); } + +bool AArch64TargetLowering::isConstantUnsignedBitfieldExtactLegal( + unsigned Opc, LLT Ty1, LLT Ty2) const { + return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)); +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-and.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-and.mir @@ -0,0 +1,282 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner --aarch64postlegalizercombinerhelper-only-enable-rule="bitfield_extract_from_and" -verify-machineinstrs %s -o - | FileCheck %s +# REQUIRES: asserts + +# Check that we can combine +# +# and (lshr x, cst), mask -> ubfx x, cst, width + +... +--- +name: ubfx_s32 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + + ; LSB = 5 + ; Width = LSB + trailing_ones(255) - 1 = + ; 5 + 8 - 1 = 12 + + ; CHECK-LABEL: name: ubfx_s32 + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: %and:_(s32) = G_UBFX %x, %lsb(s32), [[C]] + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %lsb:_(s32) = G_CONSTANT i32 5 + %mask:_(s32) = G_CONSTANT i32 255 + %shift:_(s32) = G_LSHR %x, %lsb + %and:_(s32) = G_AND %shift, %mask + $w0 = COPY %and + RET_ReallyLR implicit $w0 + +... +--- +name: ubfx_s64 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $x0 + + ; LSB = 5 + ; Width = LSB + trailing_ones(1) - 1 = + ; 5 + 1 - 1 = 5 + + ; CHECK-LABEL: name: ubfx_s64 + ; CHECK: liveins: $x0 + ; CHECK: %x:_(s64) = COPY $x0 + ; CHECK: %lsb:_(s64) = G_CONSTANT i64 5 + ; CHECK: %mask:_(s64) = G_CONSTANT i64 1 + ; CHECK: %and:_(s64) = G_UBFX %x, %lsb(s64), %mask + ; CHECK: $x0 = COPY %and(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s64) = COPY $x0 + %lsb:_(s64) = G_CONSTANT i64 5 + %mask:_(s64) = G_CONSTANT i64 1 + %shift:_(s64) = G_LSHR %x, %lsb + %and:_(s64) = G_AND %shift, %mask + $x0 = COPY %and + RET_ReallyLR implicit $x0 + +... +--- +name: dont_combine_no_and_cst +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0, $w1 + + ; UBFX needs to be selected to UBFMWri/UBFMXri, so we need constants. + + ; CHECK-LABEL: name: dont_combine_no_and_cst + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 5 + ; CHECK: %shift:_(s32) = G_LSHR %x, %lsb(s32) + ; CHECK: %and:_(s32) = G_AND %shift, %y + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %lsb:_(s32) = G_CONSTANT i32 5 + %shift:_(s32) = G_LSHR %x, %lsb + %and:_(s32) = G_AND %shift, %y + $w0 = COPY %and + RET_ReallyLR implicit $w0 + +... +--- +name: dont_combine_and_cst_not_mask +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: dont_combine_and_cst_not_mask + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 5 + ; CHECK: %not_a_mask:_(s32) = G_CONSTANT i32 2 + ; CHECK: %shift:_(s32) = G_LSHR %x, %lsb(s32) + ; CHECK: %and:_(s32) = G_AND %shift, %not_a_mask + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %lsb:_(s32) = G_CONSTANT i32 5 + %not_a_mask:_(s32) = G_CONSTANT i32 2 + %shift:_(s32) = G_LSHR %x, %lsb + %and:_(s32) = G_AND %shift, %not_a_mask + $w0 = COPY %and + RET_ReallyLR implicit $w0 + +... +--- +name: dont_combine_shift_more_than_one_use +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: dont_combine_shift_more_than_one_use + ; CHECK: liveins: $x0 + ; CHECK: %x:_(s64) = COPY $x0 + ; CHECK: %lsb:_(s64) = G_CONSTANT i64 5 + ; CHECK: %mask:_(s64) = G_CONSTANT i64 1 + ; CHECK: %shift:_(s64) = G_LSHR %x, %lsb(s64) + ; CHECK: %and:_(s64) = G_AND %shift, %mask + ; CHECK: %sub:_(s64) = G_SUB %and, %shift + ; CHECK: $x0 = COPY %sub(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s64) = COPY $x0 + %lsb:_(s64) = G_CONSTANT i64 5 + %mask:_(s64) = G_CONSTANT i64 1 + %shift:_(s64) = G_LSHR %x, %lsb + %and:_(s64) = G_AND %shift, %mask + %sub:_(s64) = G_SUB %and, %shift + $x0 = COPY %sub + RET_ReallyLR implicit $x0 + +... +--- +name: dont_combine_negative_lsb +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + + ; LSB must be in [0, reg_size) + + ; CHECK-LABEL: name: dont_combine_negative_lsb + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %negative:_(s32) = G_CONSTANT i32 -1 + ; CHECK: %mask:_(s32) = G_CONSTANT i32 255 + ; CHECK: %shift:_(s32) = G_LSHR %x, %negative(s32) + ; CHECK: %and:_(s32) = G_AND %shift, %mask + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %negative:_(s32) = G_CONSTANT i32 -1 + %mask:_(s32) = G_CONSTANT i32 255 + %shift:_(s32) = G_LSHR %x, %negative + %and:_(s32) = G_AND %shift, %mask + $w0 = COPY %and + RET_ReallyLR implicit $w0 + +... +--- +name: dont_combine_lsb_too_large +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + + ; LSB must be in [0, reg_size) + + ; CHECK-LABEL: name: dont_combine_lsb_too_large + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %too_large:_(s32) = G_CONSTANT i32 32 + ; CHECK: %mask:_(s32) = G_CONSTANT i32 255 + ; CHECK: %shift:_(s32) = G_LSHR %x, %too_large(s32) + ; CHECK: %and:_(s32) = G_AND %shift, %mask + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %too_large:_(s32) = G_CONSTANT i32 32 + %mask:_(s32) = G_CONSTANT i32 255 + %shift:_(s32) = G_LSHR %x, %too_large + %and:_(s32) = G_AND %shift, %mask + $w0 = COPY %and + RET_ReallyLR implicit $w0 + +... +--- +name: dont_combine_vector +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $d0 + ; CHECK-LABEL: name: dont_combine_vector + ; CHECK: liveins: $d0 + ; CHECK: %x:_(<2 x s32>) = COPY $d0 + ; CHECK: %lsb_cst:_(s32) = G_CONSTANT i32 5 + ; CHECK: %lsb:_(<2 x s32>) = G_BUILD_VECTOR %lsb_cst(s32), %lsb_cst(s32) + ; CHECK: %mask_cst:_(s32) = G_CONSTANT i32 255 + ; CHECK: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_cst(s32), %mask_cst(s32) + ; CHECK: %shift:_(<2 x s32>) = G_LSHR %x, %lsb(<2 x s32>) + ; CHECK: %and:_(<2 x s32>) = G_AND %shift, %mask + ; CHECK: $d0 = COPY %and(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %x:_(<2 x s32>) = COPY $d0 + %lsb_cst:_(s32) = G_CONSTANT i32 5 + %lsb:_(<2 x s32>) = G_BUILD_VECTOR %lsb_cst, %lsb_cst + %mask_cst:_(s32) = G_CONSTANT i32 255 + %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_cst, %mask_cst + %shift:_(<2 x s32>) = G_LSHR %x, %lsb + %and:_(<2 x s32>) = G_AND %shift, %mask + $d0 = COPY %and + RET_ReallyLR implicit $d0 + +... +--- +name: max_signed_int_mask +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $x0 + ; mask = 0111 1111 1111 ... 1111 + ; mask + 1 = 1000 0000 0000 ... 0000 + ; CHECK-LABEL: name: max_signed_int_mask + ; CHECK: liveins: $x0 + ; CHECK: %x:_(s64) = COPY $x0 + ; CHECK: %lsb:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK: %and:_(s64) = G_UBFX %x, %lsb(s64), [[C]] + ; CHECK: $x0 = COPY %and(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s64) = COPY $x0 + %lsb:_(s64) = G_CONSTANT i64 0 + %mask:_(s64) = G_CONSTANT i64 9223372036854775807 + %shift:_(s64) = G_LSHR %x, %lsb + %and:_(s64) = G_AND %shift, %mask + $x0 = COPY %and + RET_ReallyLR implicit $x0 + +... +--- +name: max_unsigned_int_mask +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $x0 + ; mask = 1111 1111 1111 ... 1111 + ; mask + 1 = 0000 0000 0000 ... 000 + ; CHECK-LABEL: name: max_unsigned_int_mask + ; CHECK: liveins: $x0 + ; CHECK: %x:_(s64) = COPY $x0 + ; CHECK: %lsb:_(s64) = G_CONSTANT i64 5 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: %and:_(s64) = G_UBFX %x, %lsb(s64), [[C]] + ; CHECK: $x0 = COPY %and(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s64) = COPY $x0 + %lsb:_(s64) = G_CONSTANT i64 5 + %mask:_(s64) = G_CONSTANT i64 18446744073709551615 + %shift:_(s64) = G_LSHR %x, %lsb + %and:_(s64) = G_AND %shift, %mask + $x0 = COPY %and + RET_ReallyLR implicit $x0