diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -79,14 +79,6 @@ using BuildFnTy = std::function; -struct MergeTruncStoresInfo { - SmallVector FoundStores; - GStore *LowestIdxStore = nullptr; - Register WideSrcVal; - bool NeedBSwap = false; - bool NeedRotate = false; -}; - using OperandBuildSteps = SmallVector, 4>; struct InstructionBuildSteps { @@ -577,9 +569,6 @@ /// bswap. bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo); - bool matchTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo); - void applyTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo); - bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h @@ -15,6 +15,7 @@ #define LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -131,6 +132,10 @@ bool mergeBlockStores(MachineBasicBlock &MBB); bool mergeFunctionStores(MachineFunction &MF); + bool mergeTruncStore(GStore &StoreMI, + SmallPtrSetImpl &DeletedStores); + bool mergeTruncStoresBlock(MachineBasicBlock &MBB); + /// Initialize some target-specific data structures for the store merging /// optimization. \p AddrSpace indicates which address space to use when /// probing the legalizer info for legal stores. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -715,14 +715,6 @@ [{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; - -def truncstore_merge_matcdata : GIDefMatchData<"MergeTruncStoresInfo">; -def truncstore_merge : GICombineRule< - (defs root:$root, truncstore_merge_matcdata:$info), - (match (wip_match_opcode G_STORE):$root, - [{ return Helper.matchTruncStoreMerge(*${root}, ${info}); }]), - (apply [{ Helper.applyTruncStoreMerge(*${root}, ${info}); }])>; - def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">; def extend_through_phis : GICombineRule< (defs root:$root, extend_through_phis_matchdata:$matchinfo), @@ -1105,7 +1097,7 @@ unmerge_zext_to_zext, merge_unmerge, trunc_ext_fold, trunc_shift, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, - truncstore_merge, div_rem_to_divrem, funnel_shift_combines, + div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract, constant_fold, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, and_or_disjoint_mask, fma_combines, fold_binop_into_select, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3625,275 +3625,6 @@ return true; } -/// Check if the store \p Store is a truncstore that can be merged. That is, -/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty -/// Register then it does not need to match and SrcVal is set to the source -/// value found. -/// On match, returns the start byte offset of the \p SrcVal that is being -/// stored. -static std::optional -getTruncStoreByteOffset(GStore &Store, Register &SrcVal, - MachineRegisterInfo &MRI) { - Register TruncVal; - if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) - return std::nullopt; - - // The shift amount must be a constant multiple of the narrow type. - // It is translated to the offset address in the wide source value "y". - // - // x = G_LSHR y, ShiftAmtC - // s8 z = G_TRUNC x - // store z, ... - Register FoundSrcVal; - int64_t ShiftAmt; - if (!mi_match(TruncVal, MRI, - m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)), - m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) { - if (!SrcVal.isValid() || TruncVal == SrcVal) { - if (!SrcVal.isValid()) - SrcVal = TruncVal; - return 0; // If it's the lowest index store. - } - return std::nullopt; - } - - unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); - if (ShiftAmt % NarrowBits!= 0) - return std::nullopt; - const unsigned Offset = ShiftAmt / NarrowBits; - - if (SrcVal.isValid() && FoundSrcVal != SrcVal) - return std::nullopt; - - if (!SrcVal.isValid()) - SrcVal = FoundSrcVal; - else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) - return std::nullopt; - return Offset; -} - -/// Match a pattern where a wide type scalar value is stored by several narrow -/// stores. Fold it into a single store or a BSWAP and a store if the targets -/// supports it. -/// -/// Assuming little endian target: -/// i8 *p = ... -/// i32 val = ... -/// p[0] = (val >> 0) & 0xFF; -/// p[1] = (val >> 8) & 0xFF; -/// p[2] = (val >> 16) & 0xFF; -/// p[3] = (val >> 24) & 0xFF; -/// => -/// *((i32)p) = val; -/// -/// i8 *p = ... -/// i32 val = ... -/// p[0] = (val >> 24) & 0xFF; -/// p[1] = (val >> 16) & 0xFF; -/// p[2] = (val >> 8) & 0xFF; -/// p[3] = (val >> 0) & 0xFF; -/// => -/// *((i32)p) = BSWAP(val); -bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI, - MergeTruncStoresInfo &MatchInfo) { - auto &StoreMI = cast(MI); - LLT MemTy = StoreMI.getMMO().getMemoryType(); - - // We only handle merging simple stores of 1-4 bytes. - if (!MemTy.isScalar()) - return false; - switch (MemTy.getSizeInBits()) { - case 8: - case 16: - case 32: - break; - default: - return false; - } - if (!StoreMI.isSimple()) - return false; - - // We do a simple search for mergeable stores prior to this one. - // Any potential alias hazard along the way terminates the search. - SmallVector FoundStores; - - // We're looking for: - // 1) a (store(trunc(...))) - // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get - // the partial value stored. - // 3) where the offsets form either a little or big-endian sequence. - - auto &LastStore = StoreMI; - - // The single base pointer that all stores must use. - Register BaseReg; - int64_t LastOffset; - if (!mi_match(LastStore.getPointerReg(), MRI, - m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) { - BaseReg = LastStore.getPointerReg(); - LastOffset = 0; - } - - GStore *LowestIdxStore = &LastStore; - int64_t LowestIdxOffset = LastOffset; - - Register WideSrcVal; - auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI); - if (!LowestShiftAmt) - return false; // Didn't match a trunc. - assert(WideSrcVal.isValid()); - - LLT WideStoreTy = MRI.getType(WideSrcVal); - // The wide type might not be a multiple of the memory type, e.g. s48 and s32. - if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0) - return false; - const unsigned NumStoresRequired = - WideStoreTy.getSizeInBits() / MemTy.getSizeInBits(); - - SmallVector OffsetMap(NumStoresRequired, INT64_MAX); - OffsetMap[*LowestShiftAmt] = LastOffset; - FoundStores.emplace_back(&LastStore); - - // Search the block up for more stores. - // We use a search threshold of 10 instructions here because the combiner - // works top-down within a block, and we don't want to search an unbounded - // number of predecessor instructions trying to find matching stores. - // If we moved this optimization into a separate pass then we could probably - // use a more efficient search without having a hard-coded threshold. - const int MaxInstsToCheck = 10; - int NumInstsChecked = 0; - for (auto II = ++LastStore.getReverseIterator(); - II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck; - ++II) { - NumInstsChecked++; - GStore *NewStore; - if ((NewStore = dyn_cast(&*II))) { - if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple()) - break; - } else if (II->isLoadFoldBarrier() || II->mayLoad()) { - break; - } else { - continue; // This is a safe instruction we can look past. - } - - Register NewBaseReg; - int64_t MemOffset; - // Check we're storing to the same base + some offset. - if (!mi_match(NewStore->getPointerReg(), MRI, - m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) { - NewBaseReg = NewStore->getPointerReg(); - MemOffset = 0; - } - if (BaseReg != NewBaseReg) - break; - - auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI); - if (!ShiftByteOffset) - break; - if (MemOffset < LowestIdxOffset) { - LowestIdxOffset = MemOffset; - LowestIdxStore = NewStore; - } - - // Map the offset in the store and the offset in the combined value, and - // early return if it has been set before. - if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired || - OffsetMap[*ShiftByteOffset] != INT64_MAX) - break; - OffsetMap[*ShiftByteOffset] = MemOffset; - - FoundStores.emplace_back(NewStore); - // Reset counter since we've found a matching inst. - NumInstsChecked = 0; - if (FoundStores.size() == NumStoresRequired) - break; - } - - if (FoundStores.size() != NumStoresRequired) { - return false; - } - - const auto &DL = LastStore.getMF()->getDataLayout(); - auto &C = LastStore.getMF()->getFunction().getContext(); - // Check that a store of the wide type is both allowed and fast on the target - unsigned Fast = 0; - bool Allowed = getTargetLowering().allowsMemoryAccess( - C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); - if (!Allowed || !Fast) - return false; - - // Check if the pieces of the value are going to the expected places in memory - // to merge the stores. - unsigned NarrowBits = MemTy.getScalarSizeInBits(); - auto checkOffsets = [&](bool MatchLittleEndian) { - if (MatchLittleEndian) { - for (unsigned i = 0; i != NumStoresRequired; ++i) - if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset) - return false; - } else { // MatchBigEndian by reversing loop counter. - for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired; - ++i, --j) - if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset) - return false; - } - return true; - }; - - // Check if the offsets line up for the native data layout of this target. - bool NeedBswap = false; - bool NeedRotate = false; - if (!checkOffsets(DL.isLittleEndian())) { - // Special-case: check if byte offsets line up for the opposite endian. - if (NarrowBits == 8 && checkOffsets(DL.isBigEndian())) - NeedBswap = true; - else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian())) - NeedRotate = true; - else - return false; - } - - if (NeedBswap && - !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}})) - return false; - if (NeedRotate && - !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}})) - return false; - - MatchInfo.NeedBSwap = NeedBswap; - MatchInfo.NeedRotate = NeedRotate; - MatchInfo.LowestIdxStore = LowestIdxStore; - MatchInfo.WideSrcVal = WideSrcVal; - MatchInfo.FoundStores = std::move(FoundStores); - return true; -} - -void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI, - MergeTruncStoresInfo &MatchInfo) { - - Builder.setInstrAndDebugLoc(MI); - Register WideSrcVal = MatchInfo.WideSrcVal; - LLT WideStoreTy = MRI.getType(WideSrcVal); - - if (MatchInfo.NeedBSwap) { - WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0); - } else if (MatchInfo.NeedRotate) { - assert(WideStoreTy.getSizeInBits() % 2 == 0 && - "Unexpected type for rotate"); - auto RotAmt = - Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2); - WideSrcVal = - Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0); - } - - Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(), - MatchInfo.LowestIdxStore->getMMO().getPointerInfo(), - MatchInfo.LowestIdxStore->getMMO().getAlign()); - - // Erase the old stores. - for (auto *ST : MatchInfo.FoundStores) - ST->eraseFromParent(); -} - bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) { assert(MI.getOpcode() == TargetOpcode::G_PHI); diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -10,6 +10,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" @@ -617,11 +619,304 @@ return Changed; } +/// Check if the store \p Store is a truncstore that can be merged. That is, +/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty +/// Register then it does not need to match and SrcVal is set to the source +/// value found. +/// On match, returns the start byte offset of the \p SrcVal that is being +/// stored. +static std::optional +getTruncStoreByteOffset(GStore &Store, Register &SrcVal, + MachineRegisterInfo &MRI) { + Register TruncVal; + if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) + return std::nullopt; + + // The shift amount must be a constant multiple of the narrow type. + // It is translated to the offset address in the wide source value "y". + // + // x = G_LSHR y, ShiftAmtC + // s8 z = G_TRUNC x + // store z, ... + Register FoundSrcVal; + int64_t ShiftAmt; + if (!mi_match(TruncVal, MRI, + m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)), + m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) { + if (!SrcVal.isValid() || TruncVal == SrcVal) { + if (!SrcVal.isValid()) + SrcVal = TruncVal; + return 0; // If it's the lowest index store. + } + return std::nullopt; + } + + unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); + if (ShiftAmt % NarrowBits != 0) + return std::nullopt; + const unsigned Offset = ShiftAmt / NarrowBits; + + if (SrcVal.isValid() && FoundSrcVal != SrcVal) + return std::nullopt; + + if (!SrcVal.isValid()) + SrcVal = FoundSrcVal; + else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) + return std::nullopt; + return Offset; +} + +/// Match a pattern where a wide type scalar value is stored by several narrow +/// stores. Fold it into a single store or a BSWAP and a store if the targets +/// supports it. +/// +/// Assuming little endian target: +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 0) & 0xFF; +/// p[1] = (val >> 8) & 0xFF; +/// p[2] = (val >> 16) & 0xFF; +/// p[3] = (val >> 24) & 0xFF; +/// => +/// *((i32)p) = val; +/// +/// i8 *p = ... +/// i32 val = ... +/// p[0] = (val >> 24) & 0xFF; +/// p[1] = (val >> 16) & 0xFF; +/// p[2] = (val >> 8) & 0xFF; +/// p[3] = (val >> 0) & 0xFF; +/// => +/// *((i32)p) = BSWAP(val); +bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI, + SmallPtrSetImpl &DeletedStores) { + LLT MemTy = StoreMI.getMMO().getMemoryType(); + + // We only handle merging simple stores of 1-4 bytes. + if (!MemTy.isScalar()) + return false; + switch (MemTy.getSizeInBits()) { + case 8: + case 16: + case 32: + break; + default: + return false; + } + if (!StoreMI.isSimple()) + return false; + + // We do a simple search for mergeable stores prior to this one. + // Any potential alias hazard along the way terminates the search. + SmallVector FoundStores; + + // We're looking for: + // 1) a (store(trunc(...))) + // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get + // the partial value stored. + // 3) where the offsets form either a little or big-endian sequence. + + auto &LastStore = StoreMI; + + // The single base pointer that all stores must use. + Register BaseReg; + int64_t LastOffset; + if (!mi_match(LastStore.getPointerReg(), *MRI, + m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) { + BaseReg = LastStore.getPointerReg(); + LastOffset = 0; + } + + GStore *LowestIdxStore = &LastStore; + int64_t LowestIdxOffset = LastOffset; + + Register WideSrcVal; + auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, *MRI); + if (!LowestShiftAmt) + return false; // Didn't match a trunc. + assert(WideSrcVal.isValid()); + + LLT WideStoreTy = MRI->getType(WideSrcVal); + // The wide type might not be a multiple of the memory type, e.g. s48 and s32. + if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0) + return false; + const unsigned NumStoresRequired = + WideStoreTy.getSizeInBits() / MemTy.getSizeInBits(); + + SmallVector OffsetMap(NumStoresRequired, INT64_MAX); + OffsetMap[*LowestShiftAmt] = LastOffset; + FoundStores.emplace_back(&LastStore); + + const int MaxInstsToCheck = 10; + int NumInstsChecked = 0; + for (auto II = ++LastStore.getReverseIterator(); + II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck; + ++II) { + NumInstsChecked++; + GStore *NewStore; + if ((NewStore = dyn_cast(&*II))) { + if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple()) + break; + } else if (II->isLoadFoldBarrier() || II->mayLoad()) { + break; + } else { + continue; // This is a safe instruction we can look past. + } + + Register NewBaseReg; + int64_t MemOffset; + // Check we're storing to the same base + some offset. + if (!mi_match(NewStore->getPointerReg(), *MRI, + m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) { + NewBaseReg = NewStore->getPointerReg(); + MemOffset = 0; + } + if (BaseReg != NewBaseReg) + break; + + auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, *MRI); + if (!ShiftByteOffset) + break; + if (MemOffset < LowestIdxOffset) { + LowestIdxOffset = MemOffset; + LowestIdxStore = NewStore; + } + + // Map the offset in the store and the offset in the combined value, and + // early return if it has been set before. + if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired || + OffsetMap[*ShiftByteOffset] != INT64_MAX) + break; + OffsetMap[*ShiftByteOffset] = MemOffset; + + FoundStores.emplace_back(NewStore); + // Reset counter since we've found a matching inst. + NumInstsChecked = 0; + if (FoundStores.size() == NumStoresRequired) + break; + } + + if (FoundStores.size() != NumStoresRequired) { + if (FoundStores.size() == 1) + return false; + // We didn't find enough stores to merge into the size of the original + // source value, but we may be able to generate a smaller store if we + // truncate the source value. + WideStoreTy = LLT::scalar(FoundStores.size() * MemTy.getScalarSizeInBits()); + } + + unsigned NumStoresFound = FoundStores.size(); + + const auto &DL = LastStore.getMF()->getDataLayout(); + auto &C = LastStore.getMF()->getFunction().getContext(); + // Check that a store of the wide type is both allowed and fast on the target + unsigned Fast = 0; + bool Allowed = TLI->allowsMemoryAccess( + C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); + if (!Allowed || !Fast) + return false; + + // Check if the pieces of the value are going to the expected places in memory + // to merge the stores. + unsigned NarrowBits = MemTy.getScalarSizeInBits(); + auto checkOffsets = [&](bool MatchLittleEndian) { + if (MatchLittleEndian) { + for (unsigned i = 0; i != NumStoresFound; ++i) + if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset) + return false; + } else { // MatchBigEndian by reversing loop counter. + for (unsigned i = 0, j = NumStoresFound - 1; i != NumStoresFound; + ++i, --j) + if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset) + return false; + } + return true; + }; + + // Check if the offsets line up for the native data layout of this target. + bool NeedBswap = false; + bool NeedRotate = false; + if (!checkOffsets(DL.isLittleEndian())) { + // Special-case: check if byte offsets line up for the opposite endian. + if (NarrowBits == 8 && checkOffsets(DL.isBigEndian())) + NeedBswap = true; + else if (NumStoresFound == 2 && checkOffsets(DL.isBigEndian())) + NeedRotate = true; + else + return false; + } + + if (NeedBswap && + !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}, *MF)) + return false; + if (NeedRotate && + !isLegalOrBeforeLegalizer( + {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, *MF)) + return false; + + Builder.setInstrAndDebugLoc(StoreMI); + + if (WideStoreTy != MRI->getType(WideSrcVal)) + WideSrcVal = Builder.buildTrunc(WideStoreTy, WideSrcVal).getReg(0); + + if (NeedBswap) { + WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0); + } else if (NeedRotate) { + assert(WideStoreTy.getSizeInBits() % 2 == 0 && + "Unexpected type for rotate"); + auto RotAmt = + Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2); + WideSrcVal = + Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0); + } + + Builder.buildStore(WideSrcVal, LowestIdxStore->getPointerReg(), + LowestIdxStore->getMMO().getPointerInfo(), + LowestIdxStore->getMMO().getAlign()); + + // Erase the old stores. + for (auto *ST : FoundStores) { + ST->eraseFromParent(); + DeletedStores.insert(ST); + } + return true; +} + +bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) { + bool Changed = false; + SmallVector Stores; + SmallPtrSet DeletedStores; + // Walk up the block so we can see the most eligible stores. + for (MachineInstr &MI : llvm::reverse(BB)) + if (auto *StoreMI = dyn_cast(&MI)) + Stores.emplace_back(StoreMI); + + for (auto *StoreMI : Stores) { + if (DeletedStores.count(StoreMI)) + continue; + if (mergeTruncStore(*StoreMI, DeletedStores)) + Changed = true; + } + return Changed; +} + bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) { bool Changed = false; - for (auto &BB : MF) { + for (auto &BB : MF){ Changed |= mergeBlockStores(BB); + Changed |= mergeTruncStoresBlock(BB); + } + + // Erase all dead instructions left over by the merging. + if (Changed) { + for (auto &BB : MF) { + for (auto &I : make_early_inc_range(make_range(BB.rbegin(), BB.rend()))) { + if (isTriviallyDead(I, *MRI)) + I.eraseFromParent(); + } + } } + return Changed; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll @@ -287,13 +287,13 @@ } define dso_local void @missing_store(i32 %x, ptr %p) { +; The missing store of shift 16 means we can't merge to 32 bit store, +; but we can still partially merge to a 16 bit one. ; CHECK-LABEL: missing_store: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsr w8, w0, #8 -; CHECK-NEXT: lsr w9, w0, #24 -; CHECK-NEXT: strb w0, [x1] -; CHECK-NEXT: strb w8, [x1, #1] -; CHECK-NEXT: strb w9, [x1, #3] +; CHECK-NEXT: lsr w8, w0, #24 +; CHECK-NEXT: strh w0, [x1] +; CHECK-NEXT: strb w8, [x1, #3] ; CHECK-NEXT: ret %t1 = trunc i32 %x to i8 %sh1 = lshr i32 %x, 8 @@ -339,3 +339,40 @@ store i8 %t2, ptr %p1, align 1 ret void } + +declare void @use_ptr(ptr) + +define dso_local void @trunc_from_larger_src_val(i64 %hold.4.lcssa, ptr %check1792) { + ; Here we can merge these i8 stores into a single i32 store, but first we need + ; to truncate the i64 value to i32. +; CHECK-LABEL: trunc_from_larger_src_val: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: str w0, [sp, #12] +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: bl _use_ptr +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret + %hbuf = alloca [4 x i8], align 1 + %arrayidx177 = getelementptr inbounds [4 x i8], ptr %hbuf, i64 0, i64 1 + %arrayidx234 = getelementptr inbounds [4 x i8], ptr %hbuf, i64 0, i64 2 + %arrayidx237 = getelementptr inbounds [4 x i8], ptr %hbuf, i64 0, i64 3 + %conv227 = trunc i64 %hold.4.lcssa to i8 + store i8 %conv227, ptr %hbuf, align 1 + %shr229 = lshr i64 %hold.4.lcssa, 8 + %conv230 = trunc i64 %shr229 to i8 + store i8 %conv230, ptr %arrayidx177, align 1 + %shr232 = lshr i64 %hold.4.lcssa, 16 + %conv233 = trunc i64 %shr232 to i8 + store i8 %conv233, ptr %arrayidx234, align 1 + %shr235 = lshr i64 %hold.4.lcssa, 24 + %conv236 = trunc i64 %shr235 to i8 + store i8 %conv236, ptr %arrayidx237, align 1 + call void @use_ptr(ptr noundef nonnull %hbuf) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple aarch64 -run-pass=loadstore-opt -verify-machineinstrs %s -o - | FileCheck %s --- name: trunc_i16_to_i8 alignment: 4 @@ -637,20 +637,14 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[COPY1]](p0) :: (store (s8)) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[PTR_ADD]](p0) :: (store (s8)) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[TRUNC2]](s8), [[PTR_ADD1]](p0) :: (store (s8)) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[COPY1]](p0) :: (store (s16), align 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[PTR_ADD]](p0) :: (store (s8)) ; CHECK-NEXT: RET_ReallyLR %0:_(s32) = COPY $w0 %1:_(p0) = COPY $x1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging-debug.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging-debug.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging-debug.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging-debug.mir @@ -103,31 +103,21 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0, debug-location !11 ; CHECK-NEXT: DBG_VALUE [[COPY]](p0), $noreg, !9, !DIExpression(), debug-location !11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4, debug-location !DILocation(line: 2, column: 1, scope: !5) - ; CHECK-NEXT: DBG_VALUE [[C]](s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 2, column: 1, scope: !5) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 5, debug-location !DILocation(line: 3, column: 1, scope: !5) - ; CHECK-NEXT: DBG_VALUE [[C1]](s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 3, column: 1, scope: !5) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 9, debug-location !DILocation(line: 4, column: 1, scope: !5) - ; CHECK-NEXT: DBG_VALUE [[C2]](s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 4, column: 1, scope: !5) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 14, debug-location !DILocation(line: 5, column: 1, scope: !5) - ; CHECK-NEXT: DBG_VALUE [[C3]](s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 5, column: 1, scope: !5) + ; CHECK-NEXT: DBG_VALUE %1:_(s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 2, column: 1, scope: !5) + ; CHECK-NEXT: DBG_VALUE %4:_(s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 3, column: 1, scope: !5) + ; CHECK-NEXT: DBG_VALUE %7:_(s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 4, column: 1, scope: !5) + ; CHECK-NEXT: DBG_VALUE %10:_(s16), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 5, column: 1, scope: !5) ; CHECK-NEXT: DBG_VALUE 0, $noreg, !9, !DIExpression(), debug-location !DILocation(line: 6, column: 1, scope: !5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2, debug-location !DILocation(line: 7, column: 1, scope: !5) - ; CHECK-NEXT: DBG_VALUE [[C4]](s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 7, column: 1, scope: !5) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64), debug-location !DILocation(line: 8, column: 1, scope: !5) - ; CHECK-NEXT: DBG_VALUE [[PTR_ADD]](p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 8, column: 1, scope: !5) + ; CHECK-NEXT: DBG_VALUE %2:_(s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 7, column: 1, scope: !5) + ; CHECK-NEXT: DBG_VALUE %3:_(p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 8, column: 1, scope: !5) ; CHECK-NEXT: DBG_VALUE 1, $noreg, !9, !DIExpression(), debug-location !DILocation(line: 9, column: 1, scope: !5) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4, debug-location !DILocation(line: 10, column: 1, scope: !5) - ; CHECK-NEXT: DBG_VALUE [[C5]](s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 10, column: 1, scope: !5) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64), debug-location !DILocation(line: 11, column: 1, scope: !5) - ; CHECK-NEXT: DBG_VALUE [[PTR_ADD1]](p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 11, column: 1, scope: !5) + ; CHECK-NEXT: DBG_VALUE %5:_(s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 10, column: 1, scope: !5) + ; CHECK-NEXT: DBG_VALUE %6:_(p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 11, column: 1, scope: !5) ; CHECK-NEXT: DBG_VALUE 2, $noreg, !9, !DIExpression(), debug-location !DILocation(line: 12, column: 1, scope: !5) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6, debug-location !DILocation(line: 13, column: 1, scope: !5) - ; CHECK-NEXT: DBG_VALUE [[C6]](s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 13, column: 1, scope: !5) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64), debug-location !DILocation(line: 14, column: 1, scope: !5) - ; CHECK-NEXT: DBG_VALUE [[PTR_ADD2]](p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 14, column: 1, scope: !5) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 3940688328982532 - ; CHECK-NEXT: G_STORE [[C7]](s64), [[COPY]](p0), debug-location !DILocation(line: 9, scope: !5) :: (store (s64), align 2) + ; CHECK-NEXT: DBG_VALUE %8:_(s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 13, column: 1, scope: !5) + ; CHECK-NEXT: DBG_VALUE %9:_(p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 14, column: 1, scope: !5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3940688328982532 + ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0), debug-location !DILocation(line: 9, scope: !5) :: (store (s64), align 2) ; CHECK-NEXT: DBG_VALUE 3, $noreg, !9, !DIExpression(), debug-location !DILocation(line: 15, column: 1, scope: !5) ; CHECK-NEXT: RET_ReallyLR debug-location !DILocation(line: 16, column: 1, scope: !5) %0:_(p0) = COPY $x0, debug-location !11 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir @@ -178,14 +178,15 @@ ; CHECK-LABEL: name: test_simple_2xs8 ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 4 - ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 5 - ; CHECK: G_STORE [[C]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr11) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[C1]](s8), [[PTR_ADD]](p0) :: (store (s8) into %ir.addr2) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 4 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 5 + ; CHECK-NEXT: G_STORE [[C]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr11) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[C1]](s8), [[PTR_ADD]](p0) :: (store (s8) into %ir.addr2) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s8) = G_CONSTANT i8 4 %4:_(s8) = G_CONSTANT i8 5 @@ -211,14 +212,11 @@ ; CHECK-LABEL: name: test_simple_2xs16 ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 327684 - ; CHECK: G_STORE [[C3]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11, align 2) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 327684 + ; CHECK-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11, align 2) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s16) = G_CONSTANT i16 4 %4:_(s16) = G_CONSTANT i16 5 @@ -244,20 +242,11 @@ ; CHECK-LABEL: name: test_simple_4xs16 ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 - ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 3940688328982532 - ; CHECK: G_STORE [[C7]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 2) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3940688328982532 + ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 2) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s16) = G_CONSTANT i16 4 %4:_(s16) = G_CONSTANT i16 5 @@ -291,14 +280,11 @@ ; CHECK-LABEL: name: test_simple_2xs32 ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484 - ; CHECK: G_STORE [[C3]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 4) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484 + ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 4) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s32) = G_CONSTANT i32 4 %4:_(s32) = G_CONSTANT i32 5 @@ -324,14 +310,15 @@ ; CHECK-LABEL: name: test_simple_2xs64_illegal ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CHECK: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[C1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.addr2) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[C1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.addr2) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s64) = G_CONSTANT i64 4 %4:_(s64) = G_CONSTANT i64 5 @@ -357,18 +344,19 @@ ; CHECK-LABEL: name: test_simple_vector ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 7 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 - ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C3]](s16) - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s16>), [[COPY]](p0) :: (store (<2 x s16>) into %ir.addr11) - ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s16>), [[PTR_ADD]](p0) :: (store (<2 x s16>) into %ir.addr2) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C3]](s16) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s16>), [[COPY]](p0) :: (store (<2 x s16>) into %ir.addr11) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s16>), [[PTR_ADD]](p0) :: (store (<2 x s16>) into %ir.addr2) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %2:_(s16) = G_CONSTANT i16 4 %3:_(s16) = G_CONSTANT i16 7 @@ -399,17 +387,18 @@ ; CHECK-LABEL: name: test_unknown_alias ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.aliasptr) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2) - ; CHECK: $w0 = COPY [[LOAD]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.aliasptr) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2) + ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s32) = G_CONSTANT i32 4 @@ -439,20 +428,18 @@ ; CHECK-LABEL: name: test_2x_2xs32 ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11) - ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2) - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 73014444041 - ; CHECK: G_STORE [[C5]](s64), [[COPY1]](p0) :: (store (s64) into %ir.addr32, align 4) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 73014444041 + ; CHECK-NEXT: G_STORE [[C3]](s64), [[COPY1]](p0) :: (store (s64) into %ir.addr32, align 4) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s32) = G_CONSTANT i32 4 @@ -486,16 +473,17 @@ ; CHECK-LABEL: name: test_simple_var_2xs8 ; CHECK: liveins: $w1, $w2, $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr11) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: G_STORE [[TRUNC1]](s8), [[PTR_ADD]](p0) :: (store (s8) into %ir.addr2) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr11) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[PTR_ADD]](p0) :: (store (s8) into %ir.addr2) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %3:_(s32) = COPY $w1 %1:_(s8) = G_TRUNC %3(s32) @@ -525,16 +513,17 @@ ; CHECK-LABEL: name: test_simple_var_2xs16 ; CHECK: liveins: $w1, $w2, $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: G_STORE [[TRUNC]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr2) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr2) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %3:_(s32) = COPY $w1 %1:_(s16) = G_TRUNC %3(s32) @@ -564,14 +553,15 @@ ; CHECK-LABEL: name: test_simple_var_2xs32 ; CHECK: liveins: $w1, $w2, $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %2:_(s32) = COPY $w2 @@ -601,25 +591,22 @@ ; CHECK-LABEL: name: test_alias_4xs16 ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 - ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 327684 - ; CHECK: G_STORE [[C6]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11, align 2) - ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CHECK: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr3) - ; CHECK: G_STORE [[C3]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2) - ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CHECK: G_STORE [[C4]](s16), [[PTR_ADD2]](p0) :: (store (s16) into %ir.addr4) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 327684 + ; CHECK-NEXT: G_STORE [[C3]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11, align 2) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[C]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr3) + ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK-NEXT: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr4) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s16) = G_CONSTANT i16 4 @@ -658,27 +645,24 @@ ; Here store of 5 and 9 can be merged, others have aliasing barriers. ; CHECK-LABEL: name: test_alias2_4xs16 ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 - ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 - ; CHECK: G_STORE [[C]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11) - ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK: G_STORE [[C1]](s16), [[COPY2]](p0) :: (store (s16) into %ir.ptr3) - ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 589829 - ; CHECK: G_STORE [[C7]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2, align 2) - ; CHECK: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2) - ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CHECK: G_STORE [[C4]](s16), [[PTR_ADD2]](p0) :: (store (s16) into %ir.addr4) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 + ; CHECK-NEXT: G_STORE [[C]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY2]](p0) :: (store (s16) into %ir.ptr3) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 589829 + ; CHECK-NEXT: G_STORE [[C4]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2, align 2) + ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK-NEXT: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr4) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(p0) = COPY $x2 @@ -722,29 +706,30 @@ ; CHECK-LABEL: name: test_alias3_4xs16 ; CHECK: liveins: $x0, $x1, $x2, $x3 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 - ; CHECK: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 - ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 - ; CHECK: G_STORE [[C]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11) - ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK: G_STORE [[C1]](s16), [[COPY2]](p0) :: (store (s16) into %ir.ptr3) - ; CHECK: G_STORE [[C2]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr2) - ; CHECK: G_STORE [[C1]](s16), [[COPY3]](p0) :: (store (s16) into %ir.ptr4) - ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CHECK: G_STORE [[C3]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr3) - ; CHECK: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2) - ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CHECK: G_STORE [[C4]](s16), [[PTR_ADD2]](p0) :: (store (s16) into %ir.addr4) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 + ; CHECK-NEXT: G_STORE [[C]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY2]](p0) :: (store (s16) into %ir.ptr3) + ; CHECK-NEXT: G_STORE [[C2]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr2) + ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY3]](p0) :: (store (s16) into %ir.ptr4) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; CHECK-NEXT: G_STORE [[C3]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr3) + ; CHECK-NEXT: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; CHECK-NEXT: G_STORE [[C4]](s16), [[PTR_ADD2]](p0) :: (store (s16) into %ir.addr4) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(p0) = COPY $x2 @@ -790,17 +775,14 @@ ; CHECK-LABEL: name: test_alias_allocas_2xs32 ; CHECK: liveins: $x0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a1 - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.a2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (s32) from %ir.a2) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484 - ; CHECK: G_STORE [[C3]](s64), [[FRAME_INDEX]](p0) :: (store (s64) into %ir.addr11, align 4) - ; CHECK: $w0 = COPY [[LOAD]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a1 + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.a2 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (s32) from %ir.a2) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484 + ; CHECK-NEXT: G_STORE [[C]](s64), [[FRAME_INDEX]](p0) :: (store (s64) into %ir.addr11, align 4) + ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %3:_(s32) = G_CONSTANT i32 4 %7:_(s32) = G_CONSTANT i32 5 %1:_(p0) = G_FRAME_INDEX %stack.0.a1 @@ -829,14 +811,11 @@ ; CHECK-LABEL: name: test_simple_2xs32_with_align ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484 - ; CHECK: G_STORE [[C3]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 2) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484 + ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 2) + ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s32) = G_CONSTANT i32 4 %4:_(s32) = G_CONSTANT i32 5