diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -72,6 +72,16 @@ uint64_t ValSum; }; +struct MergeTruncStoresInfo { + bool NeedBSwap; + bool NeedRotate; + Register WideSrcVal; + GStore *LowestIdxStore; + SmallVector FoundStores; +}; + +using BuildFnTy = std::function; + using OperandBuildSteps = SmallVector, 4>; struct InstructionBuildSteps { @@ -463,7 +473,7 @@ /// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0 bool matchOverlappingAnd(MachineInstr &MI, - std::function &MatchInfo); + BuildFnTy &MatchInfo); /// \return true if \p MI is a G_AND instruction whose operands are x and y /// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.) @@ -520,7 +530,10 @@ /// And check if the tree can be replaced with a M-bit load + possibly a /// bswap. bool matchLoadOrCombine(MachineInstr &MI, - std::function &MatchInfo); + BuildFnTy &MatchInfo); + + bool matchTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo); + void applyTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo); bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); @@ -537,12 +550,10 @@ /// Use a function which takes in a MachineIRBuilder to perform a combine. /// By default, it erases the instruction \p MI from the function. - void applyBuildFn(MachineInstr &MI, - std::function &MatchInfo); + void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo); /// Use a function which takes in a MachineIRBuilder to perform a combine. /// This variant does not erase \p MI after calling the build function. - void applyBuildFnNoErase(MachineInstr &MI, - std::function &MatchInfo); + void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo); bool matchFunnelShiftToRotate(MachineInstr &MI); void applyFunnelShiftToRotate(MachineInstr &MI); @@ -557,31 +568,26 @@ /// KnownBits information. bool matchICmpToLHSKnownBits(MachineInstr &MI, - std::function &MatchInfo); + BuildFnTy &MatchInfo); - bool matchBitfieldExtractFromSExtInReg( - MachineInstr &MI, std::function &MatchInfo); + bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, + BuildFnTy &MatchInfo); /// Match: and (lshr x, cst), mask -> ubfx x, cst, width - bool matchBitfieldExtractFromAnd( - MachineInstr &MI, std::function &MatchInfo); + bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo); /// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width - bool matchBitfieldExtractFromShr( - MachineInstr &MI, std::function &MatchInfo); + bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo); /// Reassociate pointer calculations with G_ADD involved, to allow better /// addressing mode usage. - bool matchReassocPtrAdd(MachineInstr &MI, - std::function &MatchInfo); - + bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo); /// Do constant folding when opportunities are exposed after MIR building. bool matchConstantFold(MachineInstr &MI, APInt &MatchInfo); /// \returns true if it is possible to narrow the width of a scalar binop /// feeding a G_AND instruction \p MI. - bool matchNarrowBinopFeedingAnd( - MachineInstr &MI, std::function &MatchInfo); + bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo); /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -594,6 +594,14 @@ [{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +def truncstore_merge_matcdata : GIDefMatchData<"MergeTruncStoresInfo">; +def truncstore_merge : GICombineRule< + (defs root:$root, truncstore_merge_matcdata:$info), + (match (wip_match_opcode G_STORE):$root, + [{ return Helper.matchTruncStoreMerge(*${root}, ${info}); }]), + (apply [{ Helper.applyTruncStoreMerge(*${root}, ${info}); }])>; + def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">; def extend_through_phis : GICombineRule< (defs root:$root, extend_through_phis_matchdata:$matchinfo), @@ -733,8 +741,8 @@ unmerge_zext_to_zext, merge_unmerge, trunc_ext_fold, trunc_shl, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, - div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract, - constant_fold]>; + truncstore_merge, div_rem_to_divrem, funnel_shift_combines, + form_bitfield_extract, constant_fold]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -27,6 +27,8 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" #include @@ -3779,6 +3781,269 @@ return true; } +/// Check if the store \p Store is a truncstore that can be merged. That is, +/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty +/// Register then it does not need to match and SrcVal is set to the source +/// value found. +/// On match, returns the start byte offset of the \p SrcVal that is being +/// stored. +static Optional getTruncStoreByteOffset(GStore &Store, Register &SrcVal, + MachineRegisterInfo &MRI) { + Register TruncVal; + if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) + return None; + + // The shift amount must be a constant multiple of the narrow type. + // It is translated to the offset address in the wide source value "y". + // + // x = G_LSHR y, ShiftAmtC + // s8 z = G_TRUNC x + // store z, ... + Register FoundSrcVal; + int64_t ShiftAmt; + if (!mi_match(TruncVal, MRI, + m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)), + m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) { + if (!SrcVal.isValid() || TruncVal == SrcVal) { + if (!SrcVal.isValid()) + SrcVal = TruncVal; + return 0; // If it's the lowest index store. + } + return None; + } + + unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); + if (ShiftAmt % NarrowBits!= 0) + return None; + const unsigned Offset = ShiftAmt / NarrowBits; + + if (SrcVal.isValid() && FoundSrcVal != SrcVal) + return None; + + if (!SrcVal.isValid()) + SrcVal = FoundSrcVal; + else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) + return None; + return Offset; +} + +/// Match a pattern where a wide type scalar value is stored by several narrow +/// stores. Fold it into a single store or a BSWAP and a store if the targets +/// supports it. +/// +/// Assuming little endian target: +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 0) & 0xFF; +/// p[1] = (val >> 8) & 0xFF; +/// p[2] = (val >> 16) & 0xFF; +/// p[3] = (val >> 24) & 0xFF; +/// => +/// *((i32)p) = val; +/// +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 24) & 0xFF; +/// p[1] = (val >> 16) & 0xFF; +/// p[2] = (val >> 8) & 0xFF; +/// p[3] = (val >> 0) & 0xFF; +/// => +/// *((i32)p) = BSWAP(val); +bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI, + MergeTruncStoresInfo &MatchInfo) { + auto &StoreMI = cast(MI); + LLT MemTy = StoreMI.getMMO().getMemoryType(); + + // We only handle merging simple stores of 1-4 bytes. + if (!MemTy.isScalar()) + return false; + switch (MemTy.getSizeInBits()) { + case 8: + case 16: + case 32: + case 64: + break; + default: + return false; + } + if (StoreMI.isAtomic() || StoreMI.isVolatile()) + return false; + + // We do a simple search for mergeable stores prior to this one. + // Any potential alias hazard along the way terminates the search. + SmallVector FoundStores; + + // We're looking for: + // 1) a (store(trunc(...))) + // 2) of an LSHR/ASHR of a single wide value. + // a) by the appropriate shift to get the partial value stored. + // 3) where the offsets form either a little or big-endian sequence. + + auto &LastStore = StoreMI; + + // The single base pointer that all stores must use. + Register BaseReg; + // The store + int64_t LastOffset; + if (!mi_match(LastStore.getPointerReg(), MRI, + m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) { + BaseReg = LastStore.getPointerReg(); + LastOffset = 0; + } + + GStore *LowestIdxStore = &LastStore; + int64_t LowestIdxOffset = LastOffset; + + Register WideSrcVal; + auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI); + if (!LowestShiftAmt) + return false; // Didn't match a trunc. + assert(WideSrcVal.isValid()); + + LLT WideStoreTy = MRI.getType(WideSrcVal); + const unsigned NumStoresRequired = + WideStoreTy.getSizeInBits() / MemTy.getSizeInBits(); + + SmallVector OffsetMap(NumStoresRequired, INT64_MAX); + OffsetMap[*LowestShiftAmt] = LastOffset; + FoundStores.emplace_back(&LastStore); + + // Search the block up for more stores. + // We use a search threshold of 10 instructions here because the combiner + // works top-down within a block, and we don't want to search an unbounded + // number of predecessor instructions trying to find matching stores. + // If we moved this optimization into a separate pass then we could probably + // use a more efficient search without having a hard-coded threshold. + const int MaxInstsToCheck = 10; + int NumInstsChecked = 0; + for (auto II = ++LastStore.getReverseIterator(); + II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck; + ++II) { + NumInstsChecked++; + if (II->isCall() || II->mayLoad() || II->hasUnmodeledSideEffects()) + break; + GStore *NewStore = dyn_cast_or_null(&*II); + if (!NewStore || NewStore->getMMO().getMemoryType() != MemTy) + continue; + + Register NewBaseReg; + int64_t MemOffset; + // Check we're storing to the same base + some offset. + if (!mi_match(NewStore->getPointerReg(), MRI, + m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) { + NewBaseReg = NewStore->getPointerReg(); + MemOffset = 0; + } + if (BaseReg != NewBaseReg) + break; + + auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI); + if (!ShiftByteOffset) + break; + if (MemOffset < LowestIdxOffset) { + LowestIdxOffset = MemOffset; + LowestIdxStore = NewStore; + } + + // Map the offset in the store and the offset in the combined value, and + // early return if it has been set before. + if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired || + OffsetMap[*ShiftByteOffset] != INT64_MAX) + break; + OffsetMap[*ShiftByteOffset] = MemOffset; + + FoundStores.emplace_back(NewStore); + // Reset counter since we've found a matching inst. + NumInstsChecked = 0; + if (FoundStores.size() == NumStoresRequired) + break; + } + + if (FoundStores.size() != NumStoresRequired) { + return false; + } + + const auto &DL = LastStore.getMF()->getDataLayout(); + auto &C = LastStore.getMF()->getFunction().getContext(); + // Check that a store of the wide type is both allowed and fast on the target + bool Fast = false; + bool Allowed = getTargetLowering().allowsMemoryAccess( + C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); + if (!Allowed || !Fast) + return false; + + // Check if the pieces of the value are going to the expected places in memory + // to merge the stores. + unsigned NarrowBits = MemTy.getScalarSizeInBits(); + auto checkOffsets = [&](bool MatchLittleEndian) { + if (MatchLittleEndian) { + for (unsigned i = 0; i != NumStoresRequired; ++i) + if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset) + return false; + } else { // MatchBigEndian by reversing loop counter. + for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired; + ++i, --j) + if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset) + return false; + } + return true; + }; + + // Check if the offsets line up for the native data layout of this target. + bool NeedBswap = false; + bool NeedRotate = false; + if (!checkOffsets(DL.isLittleEndian())) { + // Special-case: check if byte offsets line up for the opposite endian. + if (NarrowBits == 8 && checkOffsets(DL.isBigEndian())) + NeedBswap = true; + else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian())) + NeedRotate = true; + else + return false; + } + + if (NeedBswap && + !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}})) + return false; + if (NeedRotate && + !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}})) + return false; + + MatchInfo.NeedBSwap = NeedBswap; + MatchInfo.NeedRotate = NeedRotate; + MatchInfo.LowestIdxStore = LowestIdxStore; + MatchInfo.WideSrcVal = WideSrcVal; + MatchInfo.FoundStores = std::move(FoundStores); + return true; +} + +void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI, + MergeTruncStoresInfo &MatchInfo) { + + Builder.setInstrAndDebugLoc(MI); + Register WideSrcVal = MatchInfo.WideSrcVal; + LLT WideStoreTy = MRI.getType(WideSrcVal); + + if (MatchInfo.NeedBSwap) { + WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0); + } else if (MatchInfo.NeedRotate) { + assert(WideStoreTy.getSizeInBits() % 2 == 0 && + "Unexpected type for rotate"); + auto RotAmt = + Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2); + WideSrcVal = + Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0); + } + + Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(), + MatchInfo.LowestIdxStore->getMMO().getPointerInfo(), + MatchInfo.LowestIdxStore->getMMO().getAlign()); + + // Erase the old stores. + for (auto *ST : MatchInfo.FoundStores) + ST->eraseFromParent(); +} + bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) { assert(MI.getOpcode() == TargetOpcode::G_PHI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll @@ -0,0 +1,331 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-apple-ios -global-isel -global-isel-abort=1 | FileCheck %s + +define dso_local void @trunc_i16_to_i8(i16 %x, i8* %p) { +; CHECK-LABEL: trunc_i16_to_i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w0, [x1] +; CHECK-NEXT: ret + %t1 = trunc i16 %x to i8 + %sh = lshr i16 %x, 8 + %t2 = trunc i16 %sh to i8 + store i8 %t1, i8* %p, align 1 + %p1 = getelementptr inbounds i8, i8* %p, i64 1 + store i8 %t2, i8* %p1, align 1 + ret void +} + +define dso_local void @trunc_i32_to_i8(i32 %x, i8* %p) { +; CHECK-LABEL: trunc_i32_to_i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: str w0, [x1] +; CHECK-NEXT: ret + %t1 = trunc i32 %x to i8 + %sh1 = lshr i32 %x, 8 + %t2 = trunc i32 %sh1 to i8 + %sh2 = lshr i32 %x, 16 + %t3 = trunc i32 %sh2 to i8 + %sh3 = lshr i32 %x, 24 + %t4 = trunc i32 %sh3 to i8 + store i8 %t1, i8* %p, align 1 + %p1 = getelementptr inbounds i8, i8* %p, i64 1 + store i8 %t2, i8* %p1, align 1 + %p2 = getelementptr inbounds i8, i8* %p, i64 2 + store i8 %t3, i8* %p2, align 1 + %p3 = getelementptr inbounds i8, i8* %p, i64 3 + store i8 %t4, i8* %p3, align 1 + ret void +} + +define dso_local void @trunc_i32_to_i16(i32 %x, i16* %p) { +; CHECK-LABEL: trunc_i32_to_i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: str w0, [x1] +; CHECK-NEXT: ret + %t1 = trunc i32 %x to i16 + %sh = lshr i32 %x, 16 + %t2 = trunc i32 %sh to i16 + store i16 %t1, i16* %p, align 2 + %p1 = getelementptr inbounds i16, i16* %p, i64 1 + store i16 %t2, i16* %p1, align 2 + ret void +} + +define dso_local void @be_i32_to_i16(i32 %x, i16* %p0) { +; CHECK-LABEL: be_i32_to_i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ror w8, w0, #16 +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: ret + %sh1 = lshr i32 %x, 16 + %t0 = trunc i32 %x to i16 + %t1 = trunc i32 %sh1 to i16 + %p1 = getelementptr inbounds i16, i16* %p0, i64 1 + store i16 %t0, i16* %p1, align 2 + store i16 %t1, i16* %p0, align 2 + ret void +} + +define dso_local void @be_i32_to_i16_order(i32 %x, i16* %p0) { +; CHECK-LABEL: be_i32_to_i16_order: +; CHECK: ; %bb.0: +; CHECK-NEXT: ror w8, w0, #16 +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: ret + %sh1 = lshr i32 %x, 16 + %t0 = trunc i32 %x to i16 + %t1 = trunc i32 %sh1 to i16 + %p1 = getelementptr inbounds i16, i16* %p0, i64 1 + store i16 %t1, i16* %p0, align 2 + store i16 %t0, i16* %p1, align 2 + ret void +} + +define dso_local void @trunc_i64_to_i8(i64 %x, i8* %p) { +; CHECK-LABEL: trunc_i64_to_i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret + %t1 = trunc i64 %x to i8 + %sh1 = lshr i64 %x, 8 + %t2 = trunc i64 %sh1 to i8 + %sh2 = lshr i64 %x, 16 + %t3 = trunc i64 %sh2 to i8 + %sh3 = lshr i64 %x, 24 + %t4 = trunc i64 %sh3 to i8 + %sh4 = lshr i64 %x, 32 + %t5 = trunc i64 %sh4 to i8 + %sh5 = lshr i64 %x, 40 + %t6 = trunc i64 %sh5 to i8 + %sh6 = lshr i64 %x, 48 + %t7 = trunc i64 %sh6 to i8 + %sh7 = lshr i64 %x, 56 + %t8 = trunc i64 %sh7 to i8 + store i8 %t1, i8* %p, align 1 + %p1 = getelementptr inbounds i8, i8* %p, i64 1 + store i8 %t2, i8* %p1, align 1 + %p2 = getelementptr inbounds i8, i8* %p, i64 2 + store i8 %t3, i8* %p2, align 1 + %p3 = getelementptr inbounds i8, i8* %p, i64 3 + store i8 %t4, i8* %p3, align 1 + %p4 = getelementptr inbounds i8, i8* %p, i64 4 + store i8 %t5, i8* %p4, align 1 + %p5 = getelementptr inbounds i8, i8* %p, i64 5 + store i8 %t6, i8* %p5, align 1 + %p6 = getelementptr inbounds i8, i8* %p, i64 6 + store i8 %t7, i8* %p6, align 1 + %p7 = getelementptr inbounds i8, i8* %p, i64 7 + store i8 %t8, i8* %p7, align 1 + ret void +} + +define dso_local void @trunc_i64_to_i16(i64 %x, i16* %p) { +; CHECK-LABEL: trunc_i64_to_i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret + %t1 = trunc i64 %x to i16 + %sh1 = lshr i64 %x, 16 + %t2 = trunc i64 %sh1 to i16 + %sh2 = lshr i64 %x, 32 + %t3 = trunc i64 %sh2 to i16 + %sh3 = lshr i64 %x, 48 + %t4 = trunc i64 %sh3 to i16 + store i16 %t1, i16* %p, align 2 + %p1 = getelementptr inbounds i16, i16* %p, i64 1 + store i16 %t2, i16* %p1, align 2 + %p2 = getelementptr inbounds i16, i16* %p, i64 2 + store i16 %t3, i16* %p2, align 2 + %p3 = getelementptr inbounds i16, i16* %p, i64 3 + store i16 %t4, i16* %p3, align 2 + ret void +} + +define dso_local void @trunc_i64_to_i32(i64 %x, i32* %p) { +; CHECK-LABEL: trunc_i64_to_i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret + %t1 = trunc i64 %x to i32 + %sh = lshr i64 %x, 32 + %t2 = trunc i64 %sh to i32 + store i32 %t1, i32* %p, align 4 + %p1 = getelementptr inbounds i32, i32* %p, i64 1 + store i32 %t2, i32* %p1, align 4 + ret void +} +define dso_local void @be_i64_to_i32(i64 %x, i32* %p0) { +; CHECK-LABEL: be_i64_to_i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ror x8, x0, #32 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %sh1 = lshr i64 %x, 32 + %t0 = trunc i64 %x to i32 + %t1 = trunc i64 %sh1 to i32 + %p1 = getelementptr inbounds i32, i32* %p0, i64 1 + store i32 %t0, i32* %p1, align 4 + store i32 %t1, i32* %p0, align 4 + ret void +} + +define dso_local void @be_i64_to_i32_order(i64 %x, i32* %p0) { +; CHECK-LABEL: be_i64_to_i32_order: +; CHECK: ; %bb.0: +; CHECK-NEXT: ror x8, x0, #32 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %sh1 = lshr i64 %x, 32 + %t0 = trunc i64 %x to i32 + %t1 = trunc i64 %sh1 to i32 + %p1 = getelementptr inbounds i32, i32* %p0, i64 1 + store i32 %t1, i32* %p0, align 4 + store i32 %t0, i32* %p1, align 4 + ret void +} + +; Negative tests. + +define void @merge_hole(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr w8, w0, #16 +; CHECK-NEXT: strb w0, [x1] +; CHECK-NEXT: strh w8, [x1, #2] +; CHECK-NEXT: ret + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p, align 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %p2, align 1 + ret void +} + +define void @merge_hole2(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr w8, w0, #16 +; CHECK-NEXT: strh w8, [x1, #2] +; CHECK-NEXT: strb w0, [x1] +; CHECK-NEXT: ret + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %p2, align 1 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p, align 1 + ret void +} + +define void @merge_hole3(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr w8, w0, #16 +; CHECK-NEXT: strb w0, [x1, #1] +; CHECK-NEXT: strh w8, [x1, #2] +; CHECK-NEXT: ret + %p1 = getelementptr inbounds i8, i8* %p, i64 1 + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p1, align 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %p2, align 1 + ret void +} + +define void @merge_hole4(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr w8, w0, #16 +; CHECK-NEXT: strb w0, [x1, #2] +; CHECK-NEXT: strh w8, [x1] +; CHECK-NEXT: ret + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i8, i8* %p, i64 2 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p2, align 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %pcast, align 1 + ret void +} + +define dso_local i32 @load_between_stores(i32 %x, i16* %p, i32 *%ptr) { +; CHECK-LABEL: load_between_stores: +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w0, [x1] +; CHECK-NEXT: ldr w8, [x2] +; CHECK-NEXT: lsr w9, w0, #16 +; CHECK-NEXT: strh w9, [x1, #2] +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %t1 = trunc i32 %x to i16 + %sh = lshr i32 %x, 16 + %t2 = trunc i32 %sh to i16 + store i16 %t1, i16* %p, align 2 + %ld = load i32, i32 *%ptr + %p1 = getelementptr inbounds i16, i16* %p, i64 1 + store i16 %t2, i16* %p1, align 2 + ret i32 %ld +} + +define dso_local void @invalid_shift(i16 %x, i8* %p) { +; CHECK-LABEL: invalid_shift: +; CHECK: ; %bb.0: +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: lsr w8, w8, #4 +; CHECK-NEXT: strb w0, [x1] +; CHECK-NEXT: strb w8, [x1, #1] +; CHECK-NEXT: ret + %t1 = trunc i16 %x to i8 + %sh = lshr i16 %x, 4 + %t2 = trunc i16 %sh to i8 + store i8 %t1, i8* %p, align 1 + %p1 = getelementptr inbounds i8, i8* %p, i64 1 + store i8 %t2, i8* %p1, align 1 + ret void +} + +define dso_local void @missing_store(i32 %x, i8* %p) { +; CHECK-LABEL: missing_store: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr w8, w0, #8 +; CHECK-NEXT: lsr w9, w0, #24 +; CHECK-NEXT: strb w0, [x1] +; CHECK-NEXT: strb w8, [x1, #1] +; CHECK-NEXT: strb w9, [x1, #3] +; CHECK-NEXT: ret + %t1 = trunc i32 %x to i8 + %sh1 = lshr i32 %x, 8 + %t2 = trunc i32 %sh1 to i8 + %sh3 = lshr i32 %x, 24 + %t4 = trunc i32 %sh3 to i8 + store i8 %t1, i8* %p, align 1 + %p1 = getelementptr inbounds i8, i8* %p, i64 1 + store i8 %t2, i8* %p1, align 1 + %p3 = getelementptr inbounds i8, i8* %p, i64 3 + store i8 %t4, i8* %p3, align 1 + ret void +} + +define dso_local void @different_base_reg(i16 %x, i8* %p, i8 *%p2) { +; CHECK-LABEL: different_base_reg: +; CHECK: ; %bb.0: +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: lsr w8, w8, #8 +; CHECK-NEXT: strb w0, [x1] +; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: ret + %t1 = trunc i16 %x to i8 + %sh = lshr i16 %x, 8 + %t2 = trunc i16 %sh to i8 + store i8 %t1, i8* %p, align 1 + %p1 = getelementptr inbounds i8, i8* %p2, i64 1 + store i8 %t2, i8* %p1, align 1 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir @@ -0,0 +1,698 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: trunc_i16_to_i8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: trunc_i16_to_i8 + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: G_STORE [[TRUNC]](s16), [[COPY1]](p0) :: (store (s16), align 1) + ; CHECK: RET_ReallyLR + %2:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %2(s32) + %1:_(p0) = COPY $x1 + %4:_(s16) = G_CONSTANT i16 8 + %3:_(s8) = G_TRUNC %0(s16) + %5:_(s16) = G_LSHR %0, %4(s16) + %6:_(s8) = G_TRUNC %5(s16) + G_STORE %3(s8), %1(p0) :: (store (s8)) + %7:_(s64) = G_CONSTANT i64 1 + %8:_(p0) = G_PTR_ADD %1, %7(s64) + G_STORE %6(s8), %8(p0) :: (store (s8)) + RET_ReallyLR + +... +--- +name: trunc_i32_to_i8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: trunc_i32_to_i8 + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32), align 1) + ; CHECK: RET_ReallyLR + %0:_(s32) = COPY $w0 + %1:_(p0) = COPY $x1 + %3:_(s32) = G_CONSTANT i32 8 + %6:_(s32) = G_CONSTANT i32 16 + %9:_(s32) = G_CONSTANT i32 24 + %2:_(s8) = G_TRUNC %0(s32) + %4:_(s32) = G_LSHR %0, %3(s32) + %5:_(s8) = G_TRUNC %4(s32) + %7:_(s32) = G_LSHR %0, %6(s32) + %8:_(s8) = G_TRUNC %7(s32) + %10:_(s32) = G_LSHR %0, %9(s32) + %11:_(s8) = G_TRUNC %10(s32) + G_STORE %2(s8), %1(p0) :: (store (s8)) + %12:_(s64) = G_CONSTANT i64 1 + %13:_(p0) = G_PTR_ADD %1, %12(s64) + G_STORE %5(s8), %13(p0) :: (store (s8)) + %14:_(s64) = G_CONSTANT i64 2 + %15:_(p0) = G_PTR_ADD %1, %14(s64) + G_STORE %8(s8), %15(p0) :: (store (s8)) + %16:_(s64) = G_CONSTANT i64 3 + %17:_(p0) = G_PTR_ADD %1, %16(s64) + G_STORE %11(s8), %17(p0) :: (store (s8)) + RET_ReallyLR + +... +--- +name: trunc_i32_to_i16 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: trunc_i32_to_i16 + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32), align 2) + ; CHECK: RET_ReallyLR + %0:_(s32) = COPY $w0 + %1:_(p0) = COPY $x1 + %3:_(s32) = G_CONSTANT i32 16 + %2:_(s16) = G_TRUNC %0(s32) + %4:_(s32) = G_LSHR %0, %3(s32) + %5:_(s16) = G_TRUNC %4(s32) + G_STORE %2(s16), %1(p0) :: (store (s16)) + %6:_(s64) = G_CONSTANT i64 2 + %7:_(p0) = G_PTR_ADD %1, %6(s64) + G_STORE %5(s16), %7(p0) :: (store (s16)) + RET_ReallyLR + +... +--- +name: be_i32_to_i16 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: be_i32_to_i16 + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[C]](s32) + ; CHECK: G_STORE [[ROTR]](s32), [[COPY1]](p0) :: (store (s32), align 2) + ; CHECK: RET_ReallyLR + %0:_(s32) = COPY $w0 + %1:_(p0) = COPY $x1 + %2:_(s32) = G_CONSTANT i32 16 + %3:_(s32) = G_LSHR %0, %2(s32) + %4:_(s16) = G_TRUNC %0(s32) + %5:_(s16) = G_TRUNC %3(s32) + %6:_(s64) = G_CONSTANT i64 2 + %7:_(p0) = G_PTR_ADD %1, %6(s64) + G_STORE %4(s16), %7(p0) :: (store (s16)) + G_STORE %5(s16), %1(p0) :: (store (s16)) + RET_ReallyLR + +... +--- +name: be_i32_to_i16_order +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: be_i32_to_i16_order + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[C]](s32) + ; CHECK: G_STORE [[ROTR]](s32), [[COPY1]](p0) :: (store (s32), align 2) + ; CHECK: RET_ReallyLR + %0:_(s32) = COPY $w0 + %1:_(p0) = COPY $x1 + %2:_(s32) = G_CONSTANT i32 16 + %3:_(s32) = G_LSHR %0, %2(s32) + %4:_(s16) = G_TRUNC %0(s32) + %5:_(s16) = G_TRUNC %3(s32) + %6:_(s64) = G_CONSTANT i64 2 + %7:_(p0) = G_PTR_ADD %1, %6(s64) + G_STORE %5(s16), %1(p0) :: (store (s16)) + G_STORE %4(s16), %7(p0) :: (store (s16)) + RET_ReallyLR + +... +--- +name: trunc_i64_to_i8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: trunc_i64_to_i8 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64), align 1) + ; CHECK: RET_ReallyLR + %0:_(s64) = COPY $x0 + %1:_(p0) = COPY $x1 + %3:_(s64) = G_CONSTANT i64 8 + %6:_(s64) = G_CONSTANT i64 16 + %9:_(s64) = G_CONSTANT i64 24 + %12:_(s64) = G_CONSTANT i64 32 + %15:_(s64) = G_CONSTANT i64 40 + %18:_(s64) = G_CONSTANT i64 48 + %21:_(s64) = G_CONSTANT i64 56 + %2:_(s8) = G_TRUNC %0(s64) + %4:_(s64) = G_LSHR %0, %3(s64) + %5:_(s8) = G_TRUNC %4(s64) + %7:_(s64) = G_LSHR %0, %6(s64) + %8:_(s8) = G_TRUNC %7(s64) + %10:_(s64) = G_LSHR %0, %9(s64) + %11:_(s8) = G_TRUNC %10(s64) + %13:_(s64) = G_LSHR %0, %12(s64) + %14:_(s8) = G_TRUNC %13(s64) + %16:_(s64) = G_LSHR %0, %15(s64) + %17:_(s8) = G_TRUNC %16(s64) + %19:_(s64) = G_LSHR %0, %18(s64) + %20:_(s8) = G_TRUNC %19(s64) + %22:_(s64) = G_LSHR %0, %21(s64) + %23:_(s8) = G_TRUNC %22(s64) + G_STORE %2(s8), %1(p0) :: (store (s8)) + %24:_(s64) = G_CONSTANT i64 1 + %25:_(p0) = G_PTR_ADD %1, %24(s64) + G_STORE %5(s8), %25(p0) :: (store (s8)) + %26:_(s64) = G_CONSTANT i64 2 + %27:_(p0) = G_PTR_ADD %1, %26(s64) + G_STORE %8(s8), %27(p0) :: (store (s8)) + %28:_(s64) = G_CONSTANT i64 3 + %29:_(p0) = G_PTR_ADD %1, %28(s64) + G_STORE %11(s8), %29(p0) :: (store (s8)) + %30:_(s64) = G_CONSTANT i64 4 + %31:_(p0) = G_PTR_ADD %1, %30(s64) + G_STORE %14(s8), %31(p0) :: (store (s8)) + %32:_(s64) = G_CONSTANT i64 5 + %33:_(p0) = G_PTR_ADD %1, %32(s64) + G_STORE %17(s8), %33(p0) :: (store (s8)) + %34:_(s64) = G_CONSTANT i64 6 + %35:_(p0) = G_PTR_ADD %1, %34(s64) + G_STORE %20(s8), %35(p0) :: (store (s8)) + %36:_(s64) = G_CONSTANT i64 7 + %37:_(p0) = G_PTR_ADD %1, %36(s64) + G_STORE %23(s8), %37(p0) :: (store (s8)) + RET_ReallyLR + +... +--- +name: trunc_i64_to_i16 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: trunc_i64_to_i16 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64), align 2) + ; CHECK: RET_ReallyLR + %0:_(s64) = COPY $x0 + %1:_(p0) = COPY $x1 + %3:_(s64) = G_CONSTANT i64 16 + %6:_(s64) = G_CONSTANT i64 32 + %9:_(s64) = G_CONSTANT i64 48 + %2:_(s16) = G_TRUNC %0(s64) + %4:_(s64) = G_LSHR %0, %3(s64) + %5:_(s16) = G_TRUNC %4(s64) + %7:_(s64) = G_LSHR %0, %6(s64) + %8:_(s16) = G_TRUNC %7(s64) + %10:_(s64) = G_LSHR %0, %9(s64) + %11:_(s16) = G_TRUNC %10(s64) + G_STORE %2(s16), %1(p0) :: (store (s16)) + %12:_(s64) = G_CONSTANT i64 2 + %13:_(p0) = G_PTR_ADD %1, %12(s64) + G_STORE %5(s16), %13(p0) :: (store (s16)) + %14:_(s64) = G_CONSTANT i64 4 + %15:_(p0) = G_PTR_ADD %1, %14(s64) + G_STORE %8(s16), %15(p0) :: (store (s16)) + %16:_(s64) = G_CONSTANT i64 6 + %17:_(p0) = G_PTR_ADD %1, %16(s64) + G_STORE %11(s16), %17(p0) :: (store (s16)) + RET_ReallyLR + +... +--- +name: trunc_i64_to_i32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: trunc_i64_to_i32 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64), align 4) + ; CHECK: RET_ReallyLR + %0:_(s64) = COPY $x0 + %1:_(p0) = COPY $x1 + %3:_(s64) = G_CONSTANT i64 32 + %2:_(s32) = G_TRUNC %0(s64) + %4:_(s64) = G_LSHR %0, %3(s64) + %5:_(s32) = G_TRUNC %4(s64) + G_STORE %2(s32), %1(p0) :: (store (s32)) + %6:_(s64) = G_CONSTANT i64 4 + %7:_(p0) = G_PTR_ADD %1, %6(s64) + G_STORE %5(s32), %7(p0) :: (store (s32)) + RET_ReallyLR + +... +--- +name: be_i64_to_i32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: be_i64_to_i32 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[ROTR:%[0-9]+]]:_(s64) = G_ROTR [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[ROTR]](s64), [[COPY1]](p0) :: (store (s64), align 4) + ; CHECK: RET_ReallyLR + %0:_(s64) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 32 + %3:_(s64) = G_LSHR %0, %2(s64) + %4:_(s32) = G_TRUNC %0(s64) + %5:_(s32) = G_TRUNC %3(s64) + %6:_(s64) = G_CONSTANT i64 4 + %7:_(p0) = G_PTR_ADD %1, %6(s64) + G_STORE %4(s32), %7(p0) :: (store (s32)) + G_STORE %5(s32), %1(p0) :: (store (s32)) + RET_ReallyLR + +... +--- +name: be_i64_to_i32_order +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: be_i64_to_i32_order + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[ROTR:%[0-9]+]]:_(s64) = G_ROTR [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[ROTR]](s64), [[COPY1]](p0) :: (store (s64), align 4) + ; CHECK: RET_ReallyLR + %0:_(s64) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 32 + %3:_(s64) = G_LSHR %0, %2(s64) + %4:_(s32) = G_TRUNC %0(s64) + %5:_(s32) = G_TRUNC %3(s64) + %6:_(s64) = G_CONSTANT i64 4 + %7:_(p0) = G_PTR_ADD %1, %6(s64) + G_STORE %5(s32), %1(p0) :: (store (s32)) + G_STORE %4(s32), %7(p0) :: (store (s32)) + RET_ReallyLR + +... +--- +name: merge_hole +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: merge_hole + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: G_STORE [[TRUNC]](s8), [[COPY1]](p0) :: (store (s8)) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16), align 1) + ; CHECK: RET_ReallyLR + %0:_(s32) = COPY $w0 + %1:_(p0) = COPY $x1 + %5:_(s32) = G_CONSTANT i32 16 + %2:_(s64) = G_CONSTANT i64 2 + %3:_(p0) = G_PTR_ADD %1, %2(s64) + %4:_(s8) = G_TRUNC %0(s32) + G_STORE %4(s8), %1(p0) :: (store (s8)) + %6:_(s32) = G_LSHR %0, %5(s32) + %7:_(s16) = G_TRUNC %6(s32) + G_STORE %7(s16), %3(p0) :: (store (s16), align 1) + RET_ReallyLR + +... +--- +name: merge_hole2 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: merge_hole2 + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: G_STORE [[TRUNC]](s16), [[PTR_ADD]](p0) :: (store (s16), align 1) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY1]](p0) :: (store (s8)) + ; CHECK: RET_ReallyLR + %0:_(s32) = COPY $w0 + %1:_(p0) = COPY $x1 + %4:_(s32) = G_CONSTANT i32 16 + %2:_(s64) = G_CONSTANT i64 2 + %3:_(p0) = G_PTR_ADD %1, %2(s64) + %5:_(s32) = G_LSHR %0, %4(s32) + %6:_(s16) = G_TRUNC %5(s32) + G_STORE %6(s16), %3(p0) :: (store (s16), align 1) + %7:_(s8) = G_TRUNC %0(s32) + G_STORE %7(s8), %1(p0) :: (store (s8)) + RET_ReallyLR + +... +--- +name: merge_hole3 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: merge_hole3 + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: G_STORE [[TRUNC]](s8), [[PTR_ADD]](p0) :: (store (s8)) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: G_STORE [[TRUNC1]](s16), [[PTR_ADD1]](p0) :: (store (s16), align 1) + ; CHECK: RET_ReallyLR + %0:_(s32) = COPY $w0 + %1:_(p0) = COPY $x1 + %7:_(s32) = G_CONSTANT i32 16 + %2:_(s64) = G_CONSTANT i64 1 + %3:_(p0) = G_PTR_ADD %1, %2(s64) + %4:_(s64) = G_CONSTANT i64 2 + %5:_(p0) = G_PTR_ADD %1, %4(s64) + %6:_(s8) = G_TRUNC %0(s32) + G_STORE %6(s8), %3(p0) :: (store (s8)) + %8:_(s32) = G_LSHR %0, %7(s32) + %9:_(s16) = G_TRUNC %8(s32) + G_STORE %9(s16), %5(p0) :: (store (s16), align 1) + RET_ReallyLR + +... +--- +name: merge_hole4 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: merge_hole4 + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: G_STORE [[TRUNC]](s8), [[PTR_ADD]](p0) :: (store (s8)) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: G_STORE [[TRUNC1]](s16), [[COPY1]](p0) :: (store (s16), align 1) + ; CHECK: RET_ReallyLR + %0:_(s32) = COPY $w0 + %1:_(p0) = COPY $x1 + %5:_(s32) = G_CONSTANT i32 16 + %2:_(s64) = G_CONSTANT i64 2 + %3:_(p0) = G_PTR_ADD %1, %2(s64) + %4:_(s8) = G_TRUNC %0(s32) + G_STORE %4(s8), %3(p0) :: (store (s8)) + %6:_(s32) = G_LSHR %0, %5(s32) + %7:_(s16) = G_TRUNC %6(s32) + G_STORE %7(s16), %1(p0) :: (store (s16), align 1) + RET_ReallyLR + +... +--- +name: load_between_stores +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } + - { reg: '$x2' } +body: | + bb.1: + liveins: $w0, $x1, $x2 + + ; CHECK-LABEL: name: load_between_stores + ; CHECK: liveins: $w0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: G_STORE [[TRUNC]](s16), [[COPY1]](p0) :: (store (s16)) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32)) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16)) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(p0) = COPY $x1 + %2:_(p0) = COPY $x2 + %4:_(s32) = G_CONSTANT i32 16 + %3:_(s16) = G_TRUNC %0(s32) + %5:_(s32) = G_LSHR %0, %4(s32) + %6:_(s16) = G_TRUNC %5(s32) + G_STORE %3(s16), %1(p0) :: (store (s16)) + %7:_(s32) = G_LOAD %2(p0) :: (load (s32)) + %8:_(s64) = G_CONSTANT i64 2 + %9:_(p0) = G_PTR_ADD %1, %8(s64) + G_STORE %6(s16), %9(p0) :: (store (s16)) + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: invalid_shift +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: invalid_shift + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16) + ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY1]](p0) :: (store (s8)) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: G_STORE [[TRUNC2]](s8), [[PTR_ADD]](p0) :: (store (s8)) + ; CHECK: RET_ReallyLR + %2:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %2(s32) + %1:_(p0) = COPY $x1 + %4:_(s16) = G_CONSTANT i16 4 + %3:_(s8) = G_TRUNC %0(s16) + %5:_(s16) = G_LSHR %0, %4(s16) + %6:_(s8) = G_TRUNC %5(s16) + G_STORE %3(s8), %1(p0) :: (store (s8)) + %7:_(s64) = G_CONSTANT i64 1 + %8:_(p0) = G_PTR_ADD %1, %7(s64) + G_STORE %6(s8), %8(p0) :: (store (s8)) + RET_ReallyLR + +... +--- +name: missing_store +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } +body: | + bb.1: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: missing_store + ; CHECK: liveins: $w0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32) + ; CHECK: G_STORE [[TRUNC]](s8), [[COPY1]](p0) :: (store (s8)) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: G_STORE [[TRUNC1]](s8), [[PTR_ADD]](p0) :: (store (s8)) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: G_STORE [[TRUNC2]](s8), [[PTR_ADD1]](p0) :: (store (s8)) + ; CHECK: RET_ReallyLR + %0:_(s32) = COPY $w0 + %1:_(p0) = COPY $x1 + %3:_(s32) = G_CONSTANT i32 8 + %6:_(s32) = G_CONSTANT i32 24 + %2:_(s8) = G_TRUNC %0(s32) + %4:_(s32) = G_LSHR %0, %3(s32) + %5:_(s8) = G_TRUNC %4(s32) + %7:_(s32) = G_LSHR %0, %6(s32) + %8:_(s8) = G_TRUNC %7(s32) + G_STORE %2(s8), %1(p0) :: (store (s8)) + %9:_(s64) = G_CONSTANT i64 1 + %10:_(p0) = G_PTR_ADD %1, %9(s64) + G_STORE %5(s8), %10(p0) :: (store (s8)) + %11:_(s64) = G_CONSTANT i64 3 + %12:_(p0) = G_PTR_ADD %1, %11(s64) + G_STORE %8(s8), %12(p0) :: (store (s8)) + RET_ReallyLR + +... +--- +name: different_base_reg +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } + - { reg: '$x2' } +body: | + bb.1: + liveins: $w0, $x1, $x2 + + ; CHECK-LABEL: name: different_base_reg + ; CHECK: liveins: $w0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16) + ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY1]](p0) :: (store (s8)) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C1]](s64) + ; CHECK: G_STORE [[TRUNC2]](s8), [[PTR_ADD]](p0) :: (store (s8)) + ; CHECK: RET_ReallyLR + %3:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %3(s32) + %1:_(p0) = COPY $x1 + %2:_(p0) = COPY $x2 + %5:_(s16) = G_CONSTANT i16 8 + %4:_(s8) = G_TRUNC %0(s16) + %6:_(s16) = G_LSHR %0, %5(s16) + %7:_(s8) = G_TRUNC %6(s16) + G_STORE %4(s8), %1(p0) :: (store (s8)) + %8:_(s64) = G_CONSTANT i64 1 + %9:_(p0) = G_PTR_ADD %2, %8(s64) + G_STORE %7(s8), %9(p0) :: (store (s8)) + RET_ReallyLR + +...