Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -516,6 +516,10 @@ bool matchBitfieldExtractFromSExtInReg( MachineInstr &MI, std::function &MatchInfo); + /// Form a G_UBFX from a G_AND fed by a right shift. + bool matchBitfieldExtractFromAnd( + MachineInstr &MI, std::function &MatchInfo); + /// Use a function which takes in a MachineIRBuilder to perform a combine. bool applyBuildFn(MachineInstr &MI, std::function &MatchInfo); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -574,6 +574,12 @@ [{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]), (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>; +def bitfield_extract_from_and : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_AND):$root, + [{ return Helper.matchBitfieldExtractFromAnd(*${root}, ${info}); }]), + (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>; + def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">; def extend_through_phis : GICombineRule< (defs root:$root, extend_through_phis_matchdata:$matchinfo), @@ -605,7 +611,7 @@ extract_all_elts_from_build_vector]>; def form_bitfield_extract : GICombineGroup<[ - bitfield_extract_from_sext_inreg]>; + bitfield_extract_from_sext_inreg, bitfield_extract_from_and]>; // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3901,6 +3901,54 @@ return true; } +bool CombinerHelper::matchBitfieldExtractFromAnd( + MachineInstr &MI, std::function &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_AND); + // TODO: Need legalizer support for splitting these back into shifts + + // extends. For now, only allow the combine post-legalization. + if (isBeforeLegalizer()) + return false; + Register Dst = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(LHS); + if (!isLegal({TargetOpcode::G_UBFX, {Ty}})) + return false; + + // TODO: Some targets don't need a constant. Support that. + // + // Match: + // and (lshr x, cst), mask + // And check if we can form: + // ubfx x, cst, width + int64_t AndImm; + if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(AndImm))) + return false; + + // The immediate is a mask of the low bits iff imm & (imm+1) == 0 + if (AndImm & (AndImm + 1)) + return false; + + Register ShiftSrc, LSBReg; + int64_t LSBImm; + if (!mi_match(LHS, MRI, + m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_Reg(LSBReg)))) || + !mi_match(LSBReg, MRI, m_ICst(LSBImm))) + return false; + + // LSB must fit within the register. + const unsigned Size = Ty.getSizeInBits(); + if (static_cast(LSBImm) >= Size) + return false; + + int64_t Width = LSBImm + APInt(Size, AndImm).countTrailingOnes() - 1; + MatchInfo = [=](MachineIRBuilder &B) { + auto WidthCst = B.buildConstant(Ty, Width); + B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBReg, WidthCst}); + }; + + return true; +} + bool CombinerHelper::applyBuildFn( MachineInstr &MI, std::function &MatchInfo) { Builder.setInstrAndDebugLoc(MI); Index: llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-and.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-and.mir @@ -0,0 +1,229 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +# Check that we can combine +# +# and (lshr x, cst), mask -> ubfx x, cst, width + +... +--- +name: ubfx_s32 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + + ; LSB = 5 + ; Width = LSB + trailing_ones(255) - 1 = + ; 5 + 8 - 1 = 12 + + ; CHECK-LABEL: name: ubfx_s32 + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: %and:_(s32) = G_UBFX %x, %lsb, [[C]] + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %lsb:_(s32) = G_CONSTANT i32 5 + %mask:_(s32) = G_CONSTANT i32 255 + %shift:_(s32) = G_LSHR %x, %lsb + %and:_(s32) = G_AND %shift, %mask + $w0 = COPY %and + RET_ReallyLR implicit $w0 + +... +--- +name: ubfx_s64 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $x0 + + ; LSB = 5 + ; Width = LSB + trailing_ones(1) - 1 = + ; 5 + 1 - 1 = 5 + + ; CHECK-LABEL: name: ubfx_s64 + ; CHECK: liveins: $x0 + ; CHECK: %x:_(s64) = COPY $x0 + ; CHECK: %lsb:_(s64) = G_CONSTANT i64 5 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK: %and:_(s64) = G_UBFX %x, %lsb, [[C]] + ; CHECK: $x0 = COPY %and(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s64) = COPY $x0 + %lsb:_(s64) = G_CONSTANT i64 5 + %mask:_(s64) = G_CONSTANT i64 1 + %shift:_(s64) = G_LSHR %x, %lsb + %and:_(s64) = G_AND %shift, %mask + $x0 = COPY %and + RET_ReallyLR implicit $x0 + +... +--- +name: dont_combine_no_and_cst +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0, $w1 + + ; UBFX needs to be selected to UBFMWri/UBFMXri, so we need constants. + + ; CHECK-LABEL: name: dont_combine_no_and_cst + ; CHECK: liveins: $w0, $w1 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w1 + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 5 + ; CHECK: %shift:_(s32) = G_LSHR %x, %lsb(s32) + ; CHECK: %and:_(s32) = G_AND %shift, %y + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %lsb:_(s32) = G_CONSTANT i32 5 + %shift:_(s32) = G_LSHR %x, %lsb + %and:_(s32) = G_AND %shift, %y + $w0 = COPY %and + RET_ReallyLR implicit $w0 + +... +--- +name: dont_combine_and_cst_not_mask +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: dont_combine_and_cst_not_mask + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 5 + ; CHECK: %not_a_mask:_(s32) = G_CONSTANT i32 2 + ; CHECK: %shift:_(s32) = G_LSHR %x, %lsb(s32) + ; CHECK: %and:_(s32) = G_AND %shift, %not_a_mask + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %lsb:_(s32) = G_CONSTANT i32 5 + %not_a_mask:_(s32) = G_CONSTANT i32 2 + %shift:_(s32) = G_LSHR %x, %lsb + %and:_(s32) = G_AND %shift, %not_a_mask + $w0 = COPY %and + RET_ReallyLR implicit $w0 + +... +--- +name: dont_combine_shift_more_than_one_use +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: dont_combine_shift_more_than_one_use + ; CHECK: liveins: $x0 + ; CHECK: %x:_(s64) = COPY $x0 + ; CHECK: %lsb:_(s64) = G_CONSTANT i64 5 + ; CHECK: %mask:_(s64) = G_CONSTANT i64 1 + ; CHECK: %shift:_(s64) = G_LSHR %x, %lsb(s64) + ; CHECK: %and:_(s64) = G_AND %shift, %mask + ; CHECK: %sub:_(s64) = G_SUB %and, %shift + ; CHECK: $x0 = COPY %sub(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s64) = COPY $x0 + %lsb:_(s64) = G_CONSTANT i64 5 + %mask:_(s64) = G_CONSTANT i64 1 + %shift:_(s64) = G_LSHR %x, %lsb + %and:_(s64) = G_AND %shift, %mask + %sub:_(s64) = G_SUB %and, %shift + $x0 = COPY %sub + RET_ReallyLR implicit $x0 + +... +--- +name: dont_combine_negative_lsb +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + + ; LSB must be in [0, reg_size) + + ; CHECK-LABEL: name: dont_combine_negative_lsb + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %negative:_(s32) = G_CONSTANT i32 -1 + ; CHECK: %mask:_(s32) = G_CONSTANT i32 255 + ; CHECK: %shift:_(s32) = G_LSHR %x, %negative(s32) + ; CHECK: %and:_(s32) = G_AND %shift, %mask + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %negative:_(s32) = G_CONSTANT i32 -1 + %mask:_(s32) = G_CONSTANT i32 255 + %shift:_(s32) = G_LSHR %x, %negative + %and:_(s32) = G_AND %shift, %mask + $w0 = COPY %and + RET_ReallyLR implicit $w0 + +... +--- +name: dont_combine_lsb_too_large +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + + ; LSB must be in [0, reg_size) + + ; CHECK-LABEL: name: dont_combine_lsb_too_large + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %too_large:_(s32) = G_CONSTANT i32 32 + ; CHECK: %mask:_(s32) = G_CONSTANT i32 255 + ; CHECK: %shift:_(s32) = G_LSHR %x, %too_large(s32) + ; CHECK: %and:_(s32) = G_AND %shift, %mask + ; CHECK: $w0 = COPY %and(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %too_large:_(s32) = G_CONSTANT i32 32 + %mask:_(s32) = G_CONSTANT i32 255 + %shift:_(s32) = G_LSHR %x, %too_large + %and:_(s32) = G_AND %shift, %mask + $w0 = COPY %and + RET_ReallyLR implicit $w0 + +... +--- +name: dont_combine_vector +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $d0 + ; CHECK-LABEL: name: dont_combine_vector + ; CHECK: liveins: $d0 + ; CHECK: %x:_(<2 x s32>) = COPY $d0 + ; CHECK: %lsb_cst:_(s32) = G_CONSTANT i32 5 + ; CHECK: %lsb:_(<2 x s32>) = G_BUILD_VECTOR %lsb_cst(s32), %lsb_cst(s32) + ; CHECK: %mask_cst:_(s32) = G_CONSTANT i32 255 + ; CHECK: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_cst(s32), %mask_cst(s32) + ; CHECK: %shift:_(<2 x s32>) = G_LSHR %x, %lsb(<2 x s32>) + ; CHECK: %and:_(<2 x s32>) = G_AND %shift, %mask + ; CHECK: $d0 = COPY %and(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %x:_(<2 x s32>) = COPY $d0 + %lsb_cst:_(s32) = G_CONSTANT i32 5 + %lsb:_(<2 x s32>) = G_BUILD_VECTOR %lsb_cst, %lsb_cst + %mask_cst:_(s32) = G_CONSTANT i32 255 + %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_cst, %mask_cst + %shift:_(<2 x s32>) = G_LSHR %x, %lsb + %and:_(<2 x s32>) = G_AND %shift, %mask + $d0 = COPY %and + RET_ReallyLR implicit $d0