diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -583,6 +583,9 @@ /// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Match: shr (and x, n), k -> ubfx x, pos, width + bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo); + // Helpers for reassociation: bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -691,9 +691,16 @@ [{ return Helper.matchBitfieldExtractFromShr(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; +def bitfield_extract_from_shr_and : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_ASHR, G_LSHR):$root, + [{ return Helper.matchBitfieldExtractFromShrAnd(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg, bitfield_extract_from_and, - bitfield_extract_from_shr]>; + bitfield_extract_from_shr, + bitfield_extract_from_shr_and]>; def udiv_by_const : GICombineRule< (defs root:$root), diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4124,6 +4124,59 @@ return true; } +bool CombinerHelper::matchBitfieldExtractFromShrAnd( + MachineInstr &MI, std::function &MatchInfo) { + const unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR); + + const Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + + Register AndSrc; + int64_t ShrAmt; + int64_t SMask; + const unsigned Size = Ty.getScalarSizeInBits(); + + // Try to match shr (and x, c1), c2 + if (!mi_match(Dst, MRI, + m_BinOp(Opcode, + m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))), + m_ICst(ShrAmt)))) + return false; + + if (ShrAmt < 0 || ShrAmt >= Size) + return false; + + // Check that [su]bfx can do the extraction, with no holes in the mask. + uint64_t UMask = SMask; + UMask |= maskTrailingOnes(ShrAmt); + UMask &= maskTrailingOnes(Size); + if (!isMask_64(UMask)) + return false; + + // Calculate start position and width of the extract. + const int64_t Pos = ShrAmt; + const int64_t Width = countTrailingOnes(UMask) - ShrAmt; + + // The result should not be sign extended if the mask zeroes the MSB. + unsigned ExtrOpcode = (Opcode == TargetOpcode::G_LSHR || + Width + ShrAmt != Size) + ? TargetOpcode::G_UBFX + : TargetOpcode::G_SBFX; + + // Check if the type we would use for the extract is legal. + LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + auto WidthCst = B.buildConstant(ExtractTy, Width); + auto PosCst = B.buildConstant(ExtractTy, Pos); + B.buildInstr(ExtrOpcode, {Dst}, {AndSrc, PosCst, WidthCst}); + }; + return true; +} + bool CombinerHelper::reassociationCanBreakAddressingModePattern( MachineInstr &PtrAdd) { assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr-and.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr-and.mir @@ -0,0 +1,152 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +# Check that we can fold a G_ASHR/G_LSHR fed by a G_AND into a G_SBFX/G_UBFX. + +--- +name: mask_extract_unsigned_32 +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: mask_extract_unsigned_32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C]](s32), [[C]] + ; CHECK-NEXT: $w0 = COPY [[UBFX]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 12 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(s32) = G_AND %0, %1 + %4:_(s32) = G_LSHR %3, %2 + $w0 = COPY %4(s32) +... +--- +name: mask_extract_unsigned_64 +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: mask_extract_unsigned_64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:_(s64) = G_UBFX [[COPY]], [[C]](s64), [[C1]] + ; CHECK-NEXT: $x0 = COPY [[UBFX]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = G_CONSTANT i64 1080863910568919040 + %2:_(s64) = G_CONSTANT i64 56 + %3:_(s64) = G_AND %0, %1 + %4:_(s64) = G_LSHR %3, %2 + $x0 = COPY %4(s64) +... +--- +name: mask_extract_signed +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: mask_extract_signed + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C]](s32), [[C1]] + ; CHECK-NEXT: $w0 = COPY [[SBFX]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 3221225472 + %2:_(s32) = G_CONSTANT i32 30 + %3:_(s32) = G_AND %0, %1 + %4:_(s32) = G_ASHR %3, %2 + $w0 = COPY %4(s32) +... +--- +name: mask_extract_signed_nonneg +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: mask_extract_signed_nonneg + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C]](s32), [[C1]] + ; CHECK-NEXT: $w0 = COPY [[UBFX]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 2147483647 + %2:_(s32) = G_CONSTANT i32 29 + %3:_(s32) = G_AND %0, %1 + %4:_(s32) = G_ASHR %3, %2 + $w0 = COPY %4(s32) +... +--- +name: no_mask_extract_large_shift +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: no_mask_extract_large_shift + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32) + ; CHECK-NEXT: $w0 = COPY [[LSHR]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 12 + %2:_(s32) = G_CONSTANT i32 33 + %3:_(s32) = G_AND %0, %1 + %4:_(s32) = G_LSHR %3, %2 + $w0 = COPY %4(s32) +... +--- +name: no_mask_extract_negative_shift +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: no_mask_extract_negative_shift + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32) + ; CHECK-NEXT: $w0 = COPY [[LSHR]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 12 + %2:_(s32) = G_CONSTANT i32 -1 + %3:_(s32) = G_AND %0, %1 + %4:_(s32) = G_LSHR %3, %2 + $w0 = COPY %4(s32) +... +--- +name: no_mask_extract_disjoint +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: no_mask_extract_disjoint + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32) + ; CHECK-NEXT: $w0 = COPY [[LSHR]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 26 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_AND %0, %1 + %4:_(s32) = G_LSHR %3, %2 + $w0 = COPY %4(s32) +... +--- +name: no_mask_extract_extra_bits +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: no_mask_extract_extra_bits + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32) + ; CHECK-NEXT: $w0 = COPY [[LSHR]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 25 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(s32) = G_AND %0, %1 + %4:_(s32) = G_LSHR %3, %2 + $w0 = COPY %4(s32) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll @@ -277,9 +277,8 @@ define dso_local void @invalid_shift(i16 %x, i8* %p) { ; CHECK-LABEL: invalid_shift: ; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: ubfx w8, w0, #4, #12 ; CHECK-NEXT: strb w0, [x1] -; CHECK-NEXT: lsr w8, w8, #4 ; CHECK-NEXT: strb w8, [x1, #1] ; CHECK-NEXT: ret %t1 = trunc i16 %x to i8 @@ -316,9 +315,8 @@ define dso_local void @different_base_reg(i16 %x, i8* %p, i8 *%p2) { ; CHECK-LABEL: different_base_reg: ; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: ubfx w8, w0, #8, #8 ; CHECK-NEXT: strb w0, [x1] -; CHECK-NEXT: lsr w8, w8, #8 ; CHECK-NEXT: strb w8, [x2, #1] ; CHECK-NEXT: ret %t1 = trunc i16 %x to i8 @@ -333,9 +331,8 @@ define dso_local void @second_store_is_volatile(i16 %x, i8* %p) { ; CHECK-LABEL: second_store_is_volatile: ; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: ubfx w8, w0, #8, #8 ; CHECK-NEXT: strb w0, [x1] -; CHECK-NEXT: lsr w8, w8, #8 ; CHECK-NEXT: strb w8, [x1, #1] ; CHECK-NEXT: ret %t1 = trunc i16 %x to i8