diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h @@ -0,0 +1,165 @@ +//== llvm/CodeGen/GlobalISel/LoadStoreOpt.h - LoadStoreOpt -------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// This is an optimization pass for GlobalISel generic memory operations. +/// Specifically, it focuses on merging stores and loads to consecutive +/// addresses. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H +#define LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" + +namespace llvm { +// Forward declarations. +class MachineRegisterInfo; +class TargetTransformInfo; +namespace GISelAddressing { +/// Helper struct to store a base, index and offset that forms an address +struct BaseIndexOffset { + Register BaseReg; + Register IndexReg; + int64_t Offset = 0; + bool IsIndexSignExt = false; +}; + +/// Returns a BaseIndexOffset which describes the pointer in \p Ptr. +BaseIndexOffset getPointerInfo(Register Ptr, MachineRegisterInfo &MRI); + +/// Compute whether or not a memory access at \p MI1 aliases with an access at +/// \p MI2 \returns true if either alias/no-alias is known. Sets \p IsAlias +/// accordingly. +bool aliasIsKnownForLoadStore(const MachineInstr &MI1, const MachineInstr &MI2, + bool &IsAlias, MachineRegisterInfo &MRI); + +/// Returns true if the instruction \p MI may alias \p Other. +/// This function uses multiple strategies to detect aliasing, whereas +/// aliasIsKnownForLoadStore just looks at the addresses of load/stores and is +/// tries to reason about base/index/offsets. +bool instMayAlias(const MachineInstr &MI, const MachineInstr &Other, + MachineRegisterInfo &MRI, AliasAnalysis *AA); +} // namespace GISelAddressing + +using namespace GISelAddressing; + +class LoadStoreOpt : public MachineFunctionPass { +public: + static char ID; + +private: + /// An input function to decide if the pass should run or not + /// on the given MachineFunction. + std::function DoNotRunPass; + + MachineRegisterInfo *MRI; + const TargetLowering *TLI; + MachineFunction *MF; + AliasAnalysis *AA; + const LegalizerInfo *LI; + + MachineIRBuilder Builder; + + /// Initialize the field members using \p MF. + void init(MachineFunction &MF); + + class StoreMergeCandidate { + public: + // The base pointer used as the base for all stores in this candidate. + Register BasePtr; + // Our algorithm is very simple at the moment. We assume that in instruction + // order stores are writing to incremeneting consecutive addresses. So when + // we walk the block in reverse order, the next eligible store must write to + // an offset one store width lower than CurrentLowestOffset. + uint64_t CurrentLowestOffset; + SmallVector Stores; + // A vector of MachineInstr/unsigned pairs to denote potential aliases that + // need to be checked before the candidate is considered safe to merge. The + // unsigned value is an index into the Stores vector. The indexed store is + // the highest-indexed store that has already been checked to not have an + // alias with the instruction. We record this so we don't have to repeat + // alias checks that have been already done, only those with stores added + // after the potential alias is recorded. + SmallVector> PotentialAliases; + + void addPotentialAlias(MachineInstr &MI); + + /// Reset this candidate back to an empty one. + void reset() { + Stores.clear(); + PotentialAliases.clear(); + CurrentLowestOffset = 0; + BasePtr = Register(); + } + }; + + bool isLegalOrBeforeLegalizer(const LegalityQuery &Query, + MachineFunction &MF) const; + /// If the given store is valid to be a member of the candidate, add it and + /// return true. Otherwise, returns false. + bool addStoreToCandidate(GStore &MI, StoreMergeCandidate &C); + /// Returns true if the instruction \p MI would potentially alias with any + /// stores in the candidate \p C. + bool operationAliasesWithCandidate(MachineInstr &MI, StoreMergeCandidate &C); + /// Merges the stores in the given vector into a wide store. + /// \p returns true if at least some of the stores were merged. + /// This may decide not to merge stores if heuristics predict it will not be + /// worth it. + bool mergeStores(SmallVectorImpl &StoresToMerge); + /// Perform a merge of all the stores in \p Stores into a single store. + /// Erases the old stores from the block when finished. + /// \returns true if merging was done. It may fail to perform a merge if + /// there are issues with materializing legal wide values. + bool doSingleStoreMerge(SmallVectorImpl &Stores); + bool processMergeCandidate(StoreMergeCandidate &C); + bool mergeBlockStores(MachineBasicBlock &MBB); + bool mergeFunctionStores(MachineFunction &MF); + + /// Initialize some target-specific data structures for the store merging + /// optimization. \p AddrSpace indicates which address space to use when + /// probing the legalizer info for legal stores. + void initializeStoreMergeTargetInfo(unsigned AddrSpace = 0); + /// A map between address space numbers and a bitvector of supported stores + /// sizes. Each bit in the bitvector represents whether a store size of + /// that bit's value is legal. E.g. if bit 64 is set, then 64 bit scalar + /// stores are legal. + DenseMap LegalStoreSizes; + bool IsPreLegalizer; + /// Contains instructions to be erased at the end of a block scan. + SmallSet InstsToErase; + +public: + LoadStoreOpt(); + LoadStoreOpt(std::function); + + StringRef getPassName() const override { return "LoadStoreOpt"; } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties() + .set(MachineFunctionProperties::Property::IsSSA); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // End namespace llvm. + +#endif diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -236,7 +236,8 @@ void initializeLiveRangeShrinkPass(PassRegistry&); void initializeLiveRegMatrixPass(PassRegistry&); void initializeLiveStacksPass(PassRegistry&); -void initializeLiveVariablesPass(PassRegistry&); +void initializeLiveVariablesPass(PassRegistry &); +void initializeLoadStoreOptPass(PassRegistry &); void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry&); void initializeLoaderPassPass(PassRegistry&); void initializeLocalStackSlotPassPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt --- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt +++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt @@ -17,6 +17,7 @@ LegalizerHelper.cpp LegalizerInfo.cpp LegacyLegalizerInfo.cpp + LoadStoreOpt.cpp Localizer.cpp LostDebugLocObserver.cpp MachineIRBuilder.cpp diff --git a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp --- a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -18,6 +18,7 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) { initializeIRTranslatorPass(Registry); initializeLegalizerPass(Registry); + initializeLoadStoreOptPass(Registry); initializeLocalizerPass(Registry); initializeRegBankSelectPass(Registry); initializeInstructionSelectPass(Registry); diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -0,0 +1,664 @@ +//===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the LoadStoreOpt optimization pass. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <_types/_uint64_t.h> +#include +#include + +#define DEBUG_TYPE "loadstore-opt" + +using namespace llvm; +using namespace ore; +using namespace MIPatternMatch; + +STATISTIC(NumStoresMerged, "Number of stores merged"); + +const unsigned MaxStoreSizeToForm = 128; + +char LoadStoreOpt::ID = 0; +INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations", + false, false) +INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations", + false, false) + +LoadStoreOpt::LoadStoreOpt(std::function F) + : MachineFunctionPass(ID), DoNotRunPass(F) {} + +LoadStoreOpt::LoadStoreOpt() + : LoadStoreOpt([](const MachineFunction &) { return false; }) {} + +void LoadStoreOpt::init(MachineFunction &MF) { + this->MF = &MF; + MRI = &MF.getRegInfo(); + AA = &getAnalysis().getAAResults(); + TLI = MF.getSubtarget().getTargetLowering(); + LI = MF.getSubtarget().getLegalizerInfo(); + Builder.setMF(MF); + IsPreLegalizer = !MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Legalized); + InstsToErase.clear(); +} + +void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr, + MachineRegisterInfo &MRI) { + BaseIndexOffset Info; + Register PtrAddRHS; + if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) { + Info.BaseReg = Ptr; + Info.IndexReg = Register(); + Info.IsIndexSignExt = false; + return Info; + } + + auto RHSCst = getConstantVRegValWithLookThrough(PtrAddRHS, MRI); + if (RHSCst) + Info.Offset = RHSCst->Value.getSExtValue(); + + // Just recognize a simple case for now. In future we'll need to match + // indexing patterns for base + index + constant. + Info.IndexReg = PtrAddRHS; + Info.IsIndexSignExt = false; + return Info; +} + +bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, + const MachineInstr &MI2, + bool &IsAlias, + MachineRegisterInfo &MRI) { + auto *LdSt1 = dyn_cast(&MI1); + auto *LdSt2 = dyn_cast(&MI2); + if (!LdSt1 || !LdSt2) + return false; + + BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI); + BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI); + + if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid()) + return false; + + int64_t Size1 = LdSt1->getMemSize(); + int64_t Size2 = LdSt2->getMemSize(); + + int64_t PtrDiff; + if (BasePtr0.BaseReg == BasePtr1.BaseReg) { + PtrDiff = BasePtr1.Offset - BasePtr0.Offset; + // If the size of memory access is unknown, do not use it to do analysis. + // One example of unknown size memory access is to load/store scalable + // vector objects on the stack. + // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the + // following situations arise: + if (PtrDiff >= 0 && + Size1 != static_cast(MemoryLocation::UnknownSize)) { + // [----BasePtr0----] + // [---BasePtr1--] + // ========PtrDiff========> + IsAlias = !(Size1 <= PtrDiff); + return true; + } + if (PtrDiff < 0 && + Size2 != static_cast(MemoryLocation::UnknownSize)) { + // [----BasePtr0----] + // [---BasePtr1--] + // =====(-PtrDiff)====> + IsAlias = !((PtrDiff + Size2) <= 0); + return true; + } + return false; + } + + // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be + // able to calculate their relative offset if at least one arises + // from an alloca. However, these allocas cannot overlap and we + // can infer there is no alias. + auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI); + auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI); + if (!Base0Def || !Base1Def) + return false; // Couldn't tell anything. + + + if (Base0Def->getOpcode() != Base1Def->getOpcode()) + return false; + + if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo(); + // If the bases have the same frame index but we couldn't find a + // constant offset, (indices are different) be conservative. + if (Base0Def != Base1Def && + (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) || + !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) { + IsAlias = false; + return true; + } + } + + // This implementation is a lot more primitive than the SDAG one for now. + // FIXME: what about constant pools? + if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) { + auto GV0 = Base0Def->getOperand(1).getGlobal(); + auto GV1 = Base1Def->getOperand(1).getGlobal(); + if (GV0 != GV1) { + IsAlias = false; + return true; + } + } + + // Can't tell anything about aliasing. + return false; +} + +bool GISelAddressing::instMayAlias(const MachineInstr &MI, + const MachineInstr &Other, + MachineRegisterInfo &MRI, + AliasAnalysis *AA) { + struct MemUseCharacteristics { + bool IsVolatile; + bool IsAtomic; + Register BasePtr; + int64_t Offset; + uint64_t NumBytes; + MachineMemOperand *MMO; + }; + + auto getCharacteristics = + [&](const MachineInstr *MI) -> MemUseCharacteristics { + if (const auto *LS = dyn_cast(MI)) { + Register BaseReg; + int64_t Offset = 0; + // No pre/post-inc addressing modes are considered here, unlike in SDAG. + if (!mi_match(LS->getPointerReg(), MRI, + m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) { + BaseReg = LS->getPointerReg(); + Offset = 0; + } + + uint64_t Size = MemoryLocation::getSizeOrUnknown( + LS->getMMO().getMemoryType().getSizeInBytes()); + return {LS->isVolatile(), LS->isAtomic(), BaseReg, + Offset /*base offset*/, Size, &LS->getMMO()}; + } + // FIXME: support recognizing lifetime instructions. + // Default. + return {false /*isvolatile*/, + /*isAtomic*/ false, Register(), + (int64_t)0 /*offset*/, 0 /*size*/, + (MachineMemOperand *)nullptr}; + }; + MemUseCharacteristics MUC0 = getCharacteristics(&MI), + MUC1 = getCharacteristics(&Other); + + // If they are to the same address, then they must be aliases. + if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr && + MUC0.Offset == MUC1.Offset) + return true; + + // If they are both volatile then they cannot be reordered. + if (MUC0.IsVolatile && MUC1.IsVolatile) + return true; + + // Be conservative about atomics for the moment + // TODO: This is way overconservative for unordered atomics (see D66309) + if (MUC0.IsAtomic && MUC1.IsAtomic) + return true; + + // If one operation reads from invariant memory, and the other may store, they + // cannot alias. + if (MUC0.MMO && MUC1.MMO) { + if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || + (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) + return false; + } + + // Try to prove that there is aliasing, or that there is no aliasing. Either + // way, we can return now. If nothing can be proved, proceed with more tests. + bool IsAlias; + if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI)) + return IsAlias; + + // The following all rely on MMO0 and MMO1 being valid. + if (!MUC0.MMO || !MUC1.MMO) + return true; + + // FIXME: port the alignment based alias analysis from SDAG's isAlias(). + int64_t SrcValOffset0 = MUC0.MMO->getOffset(); + int64_t SrcValOffset1 = MUC1.MMO->getOffset(); + uint64_t Size0 = MUC0.NumBytes; + uint64_t Size1 = MUC1.NumBytes; + if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && + Size0 != MemoryLocation::UnknownSize && + Size1 != MemoryLocation::UnknownSize) { + // Use alias analysis information. + int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); + int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset; + int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset; + if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0, + MUC0.MMO->getAAInfo()), + MemoryLocation(MUC1.MMO->getValue(), Overlap1, + MUC1.MMO->getAAInfo()))) + return false; + } + + // Otherwise we have to assume they alias. + return true; +} + +/// Returns true if the instruction creates an unavoidable hazard that +/// forces a boundary between store merge candidates. +static bool isInstHardMergeHazard(MachineInstr &MI) { + return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef(); +} + +bool LoadStoreOpt::mergeStores(SmallVectorImpl &StoresToMerge) { + // Try to merge all the stores in the vector, splitting into separate segments + // as necessary. + assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge"); + LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg()); + LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg()); + unsigned AS = PtrTy.getAddressSpace(); + // Ensure the legal store info is computed for this address space. + initializeStoreMergeTargetInfo(AS); + const auto &LegalSizes = LegalStoreSizes[AS]; + +#ifndef NDEBUG + for (auto StoreMI : StoresToMerge) + assert(MRI->getType(StoreMI->getValueReg()) == OrigTy); +#endif + + const auto &DL = MF->getFunction().getParent()->getDataLayout(); + bool AnyMerged = false; + do { + unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size()); + unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedSize(); + // Compute the biggest store we can generate to handle the number of stores. + unsigned MergeSizeBits; + for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) { + LLT StoreTy = LLT::scalar(MergeSizeBits); + EVT StoreEVT = + getApproximateEVTForLLT(StoreTy, DL, MF->getFunction().getContext()); + if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] && + TLI->canMergeStoresTo(AS, StoreEVT, *MF) && + (TLI->isTypeLegal(StoreEVT))) + break; // We can generate a MergeSize bits store. + } + if (MergeSizeBits <= OrigTy.getSizeInBits()) + return AnyMerged; // No greater merge. + + unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits(); + // Perform the actual merging. + SmallVector SingleMergeStores( + StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge); + AnyMerged |= doSingleStoreMerge(SingleMergeStores); + StoresToMerge.erase(StoresToMerge.begin(), + StoresToMerge.begin() + NumStoresToMerge); + } while (StoresToMerge.size() > 1); + return AnyMerged; +} + +bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query, + MachineFunction &MF) const { + auto Action = LI->getAction(Query).Action; + // If the instruction is unsupported, it can't be legalized at all. + if (Action == LegalizeActions::Unsupported) + return false; + return IsPreLegalizer || Action == LegalizeAction::Legal; +} + +bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl &Stores) { + assert(Stores.size() > 1); + // We know that all the stores are consecutive and there are no aliasing + // operations in the range. However, the values that are being stored may be + // generated anywhere before each store. To ensure we have the values + // available, we materialize the wide value and new store at the place of the + // final store in the merge sequence. + + LLT SmallTy = MRI->getType(Stores[0]->getValueReg()); + LLT WideValueTy = + LLT::scalar(Stores.size() * SmallTy.getSizeInBits().getFixedSize()); + + // For each store, compute pairwise merged debug locs. + DebugLoc MergedLoc; + for (unsigned AIdx = 0, BIdx = 1; BIdx < Stores.size(); ++AIdx, ++BIdx) + MergedLoc = DILocation::getMergedLocation(Stores[AIdx]->getDebugLoc(), + Stores[BIdx]->getDebugLoc()); + Builder.setInstr(*Stores.back()); + Builder.setDebugLoc(MergedLoc); + + // If all of the store values are constants, then create a wide constant + // directly. Otherwise, we need to generate some instructions to merge the + // existing values together into a wider type. + SmallVector ConstantVals; + for (auto Store : Stores) { + auto MaybeCst = + getConstantVRegValWithLookThrough(Store->getValueReg(), *MRI); + if (!MaybeCst) { + ConstantVals.clear(); + break; + } + ConstantVals.emplace_back(MaybeCst->Value); + } + + Register WideReg; + auto *WideMMO = + MF->getMachineMemOperand(&Stores[0]->getMMO(), 0, WideValueTy); + if (ConstantVals.empty()) { + // Mimic the SDAG behaviour here and don't try to do anything for unknown + // values. In future, we should also support the cases of loads and + // extracted vector elements. + return false; + } + + assert(ConstantVals.size() == Stores.size()); + // Check if our wide constant is legal. + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF)) + return false; + APInt WideConst(WideValueTy.getSizeInBits(), 0); + for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) { + // Insert the smaller constant into the corresponding position in the + // wider one. + WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits()); + } + WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0); + auto NewStore = + Builder.buildStore(WideReg, Stores[0]->getPointerReg(), *WideMMO); + LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore); + NumStoresMerged += Stores.size(); + + MachineOptimizationRemarkEmitter MORE(*MF, nullptr); + MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore", + Stores[0]->getDebugLoc(), Stores[0]->getParent()); + R << "Merged " << NV("NumMerged", Stores.size()) << " stores of " + << NV("OrigWidth", SmallTy.getSizeInBytes()) + << " bytes into a single store of " + << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes"; + MORE.emit(R); + + for (auto MI : Stores) + InstsToErase.insert(MI); + return true; +} + +bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) { + if (C.Stores.size() < 2) { + C.reset(); + return false; + } + + LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size() + << " stores, starting with " << *C.Stores[0]); + // We know that the stores in the candidate are adjacent. + // Now we need to check if any potential aliasing instructions recorded + // during the search alias with load/stores added to the candidate after. + // For example, if we have the candidate: + // C.Stores = [ST1, ST2, ST3, ST4] + // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or + // ST2, then we would have recorded it into the PotentialAliases structure + // with the associated index value of "1". Then we see ST3 and ST4 and add + // them to the candidate group. We know that LD1 does not alias with ST1 or + // ST2, since we already did that check. However we don't yet know if it + // may alias ST3 and ST4, so we perform those checks now. + SmallVector StoresToMerge; + + auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) { + for (auto AliasInfo : reverse(C.PotentialAliases)) { + MachineInstr *PotentialAliasOp = AliasInfo.first; + unsigned PreCheckedIdx = AliasInfo.second; + if (static_cast(Idx) > PreCheckedIdx) { + // Need to check this alias. + if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI, + AA)) { + LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp + << " detected\n"); + return true; + } + } else { + // Once our store index is lower than the index associated with the + // potential alias, we know that we've already checked for this alias + // and all of the earlier potential aliases too. + return false; + } + } + return false; + }; + // Start from the last store in the group, and check if it aliases with any + // of the potential aliasing operations in the list. + for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) { + auto *CheckStore = C.Stores[StoreIdx]; + if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore)) + continue; + StoresToMerge.emplace_back(CheckStore); + } + + LLVM_DEBUG(dbgs() << StoresToMerge.size() + << " stores remaining after alias checks. Merging...\n"); + + // Now we've checked for aliasing hazards, merge any stores left. + C.reset(); + if (StoresToMerge.size() < 2) + return false; + return mergeStores(StoresToMerge); +} + +bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI, + StoreMergeCandidate &C) { + if (C.Stores.empty()) + return false; + return std::any_of(C.Stores.begin(), C.Stores.end(), + [&](MachineInstr *OtherMI) { + return instMayAlias(MI, *OtherMI, *MRI, AA); + }); +} + +void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) { + PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1)); +} + +bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI, + StoreMergeCandidate &C) { + // Check if the given store writes to an adjacent address, and other + // requirements. + LLT ValueTy = MRI->getType(StoreMI.getValueReg()); + LLT PtrTy = MRI->getType(StoreMI.getPointerReg()); + + // Only handle scalars. + if (!ValueTy.isScalar()) + return false; + + // Don't allow truncating stores for now. + if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits()) + return false; + + Register StoreAddr = StoreMI.getPointerReg(); + auto BIO = getPointerInfo(StoreAddr, *MRI); + Register StoreBase = BIO.BaseReg; + uint64_t StoreOffCst = BIO.Offset; + if (C.Stores.empty()) { + // This is the first store of the candidate. + // If the offset can't possible allow for a lower addressed store with the + // same base, don't bother adding it. + if (StoreOffCst < ValueTy.getSizeInBytes()) + return false; + C.BasePtr = StoreBase; + C.CurrentLowestOffset = StoreOffCst; + C.Stores.emplace_back(&StoreMI); + LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: " + << StoreMI); + return true; + } + + // Check the store is the same size as the existing ones in the candidate. + if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() != + ValueTy.getSizeInBits()) + return false; + + if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() != + PtrTy.getAddressSpace()) + return false; + + // There are other stores in the candidate. Check that the store address + // writes to the next lowest adjacent address. + if (C.BasePtr != StoreBase) + return false; + if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) != + static_cast(StoreOffCst)) + return false; + + // This writes to an adjacent address. Allow it. + C.Stores.emplace_back(&StoreMI); + C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes(); + LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI); + return true; +} + +bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) { + bool Changed = false; + // Walk through the block bottom-up, looking for merging candidates. + StoreMergeCandidate Candidate; + for (auto II = MBB.rbegin(), IE = MBB.rend(); II != IE; ++II) { + MachineInstr &MI = *II; + if (InstsToErase.contains(&MI)) + continue; + + if (auto StoreMI = dyn_cast(&*II)) { + // We have a G_STORE. Add it to the candidate if it writes to an adjacent + // address. + if (!addStoreToCandidate(*StoreMI, Candidate)) { + // Store wasn't eligible to be added. May need to record it as a + // potential alias. + if (operationAliasesWithCandidate(*StoreMI, Candidate)) { + Changed |= processMergeCandidate(Candidate); + continue; + } + Candidate.addPotentialAlias(*StoreMI); + } + continue; + } + + // If we don't have any stores yet, this instruction can't pose a problem. + if (Candidate.Stores.empty()) + continue; + + // We're dealing with some other kind of instruction. + if (isInstHardMergeHazard(MI)) { + Changed |= processMergeCandidate(Candidate); + Candidate.Stores.clear(); + continue; + } + + if (!MI.mayLoadOrStore()) + continue; + + if (operationAliasesWithCandidate(MI, Candidate)) { + // We have a potential alias, so process the current candidate if we can + // and then continue looking for a new candidate. + Changed |= processMergeCandidate(Candidate); + continue; + } + + // Record this instruction as a potential alias for future stores that are + // added to the candidate. + Candidate.addPotentialAlias(MI); + } + + // Process any candidate left after finishing searching the entire block. + Changed |= processMergeCandidate(Candidate); + + // Erase instructions now that we're no longer iterating over the block. + for (auto *MI : InstsToErase) + MI->eraseFromParent(); + InstsToErase.clear(); + return Changed; +} + +bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) { + bool Changed = false; + for (auto &BB : MF) { + Changed |= mergeBlockStores(BB); + } + return Changed; +} + +void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) { + // Query the legalizer info to record what store types are legal. + // We record this because we don't want to bother trying to merge stores into + // illegal ones, which would just result in being split again. + + if (LegalStoreSizes.count(AddrSpace)) { + assert(LegalStoreSizes[AddrSpace].any()); + return; // Already cached sizes for this address space. + } + + // Need to reserve at least MaxStoreSizeToForm + 1 bits. + BitVector LegalSizes(MaxStoreSizeToForm * 2); + const auto &LI = *MF->getSubtarget().getLegalizerInfo(); + LLT PtrTy = LLT::pointer(AddrSpace, 64); + // We assume that we're not going to be generating any stores wider than + // MaxStoreSizeToForm bits for now. + for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) { + LLT Ty = LLT::scalar(Size); + SmallVector MemDescrs( + {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}}); + SmallVector StoreTys({Ty, PtrTy}); + LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs); + LegalizeActionStep ActionStep = LI.getAction(Q); + if (ActionStep.Action == LegalizeActions::Legal) + LegalSizes.set(Size); + } + assert(LegalSizes.any() && "Expected some store sizes to be legal!"); + LegalStoreSizes[AddrSpace] = LegalSizes; +} + +bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName() + << '\n'); + + init(MF); + bool Changed = false; + Changed |= mergeFunctionStores(MF); + + LegalStoreSizes.clear(); + return Changed; +} diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h" #include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MIRParser/MIParser.h" @@ -175,6 +176,16 @@ extern cl::opt EnableHomogeneousPrologEpilog; +static cl::opt EnableGISelLoadStoreOptPreLegal( + "aarch64-enable-gisel-ldst-prelegal", + cl::desc("Enable GlobalISel's pre-legalizer load/store optimization pass"), + cl::init(true), cl::Hidden); + +static cl::opt EnableGISelLoadStoreOptPostLegal( + "aarch64-enable-gisel-ldst-postlegal", + cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"), + cl::init(false), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine X(getTheAArch64leTarget()); @@ -617,8 +628,11 @@ void AArch64PassConfig::addPreLegalizeMachineIR() { if (getOptLevel() == CodeGenOpt::None) addPass(createAArch64O0PreLegalizerCombiner()); - else + else { addPass(createAArch64PreLegalizerCombiner()); + if (EnableGISelLoadStoreOptPreLegal) + addPass(new LoadStoreOpt()); + } } bool AArch64PassConfig::addLegalizeMachineIR() { @@ -628,8 +642,11 @@ void AArch64PassConfig::addPreRegBankSelect() { bool IsOptNone = getOptLevel() == CodeGenOpt::None; - if (!IsOptNone) + if (!IsOptNone) { addPass(createAArch64PostLegalizerCombiner(IsOptNone)); + if (EnableGISelLoadStoreOptPostLegal) + addPass(new LoadStoreOpt()); + } addPass(createAArch64PostLegalizerLowering()); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -60,6 +60,10 @@ ; ENABLED-O1-NEXT: PreLegalizerCombiner ; VERIFY-O0-NEXT: AArch64O0PreLegalizerCombiner ; VERIFY-NEXT: Verify generated machine code +; ENABLED-O1-NEXT: Basic Alias Analysis (stateless AA impl) +; ENABLED-O1-NEXT: Function Alias Analysis Results +; ENABLED-O1-NEXT: LoadStoreOpt +; ENABLED-O1-NEXT: Analysis containing CSE Info ; VERIFY-O0-NEXT: Analysis containing CSE Info ; ENABLED-NEXT: Legalizer ; VERIFY-NEXT: Verify generated machine code diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll @@ -0,0 +1,263 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-apple-ios -global-isel -global-isel-abort=1 - < %s | FileCheck %s + +define void @test_simple_2xs8(i8 *%ptr) { +; CHECK-LABEL: test_simple_2xs8: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: strb w8, [x0] +; CHECK-NEXT: strb w9, [x0, #1] +; CHECK-NEXT: ret + %addr1 = getelementptr i8, i8 *%ptr, i64 0 + store i8 4, i8 *%addr1 + %addr2 = getelementptr i8, i8 *%ptr, i64 1 + store i8 5, i8 *%addr2 + ret void +} + +define void @test_simple_2xs16(i16 *%ptr) { +; CHECK-LABEL: test_simple_2xs16: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: movk w8, #5, lsl #16 +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: ret + %addr1 = getelementptr i16, i16 *%ptr, i64 0 + store i16 4, i16 *%addr1 + %addr2 = getelementptr i16, i16 *%ptr, i64 1 + store i16 5, i16 *%addr2 + ret void +} + +define void @test_simple_2xs32(i32 *%ptr) { +; CHECK-LABEL: test_simple_2xs32: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x8, #4 +; CHECK-NEXT: movk x8, #5, lsl #32 +; CHECK-NEXT: str x8, [x0] +; CHECK-NEXT: ret + %addr1 = getelementptr i32, i32 *%ptr, i64 0 + store i32 4, i32 *%addr1 + %addr2 = getelementptr i32, i32 *%ptr, i64 1 + store i32 5, i32 *%addr2 + ret void +} + +define void @test_simple_2xs64_illegal(i64 *%ptr) { +; CHECK-LABEL: test_simple_2xs64_illegal: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: stp x8, x9, [x0] +; CHECK-NEXT: ret + %addr1 = getelementptr i64, i64 *%ptr, i64 0 + store i64 4, i64 *%addr1 + %addr2 = getelementptr i64, i64 *%ptr, i64 1 + store i64 5, i64 *%addr2 + ret void +} + +; Don't merge vectors...yet. +define void @test_simple_vector(<2 x i16> *%ptr) { +; CHECK-LABEL: test_simple_vector: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w9, #7 +; CHECK-NEXT: mov w10, #5 +; CHECK-NEXT: mov w11, #8 +; CHECK-NEXT: strh w8, [x0] +; CHECK-NEXT: strh w9, [x0, #2] +; CHECK-NEXT: strh w10, [x0, #4] +; CHECK-NEXT: strh w11, [x0, #6] +; CHECK-NEXT: ret + %addr1 = getelementptr <2 x i16>, <2 x i16> *%ptr, i64 0 + store <2 x i16> , <2 x i16> *%addr1 + %addr2 = getelementptr <2 x i16>, <2 x i16> *%ptr, i64 1 + store <2 x i16> , <2 x i16> *%addr2 + ret void +} + +define i32 @test_unknown_alias(i32 *%ptr, i32 *%aliasptr) { +; CHECK-LABEL: test_unknown_alias: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: ldr w8, [x1] +; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: str w9, [x0, #4] +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %addr1 = getelementptr i32, i32 *%ptr, i64 0 + store i32 4, i32 *%addr1 + %ld = load i32, i32 *%aliasptr + %addr2 = getelementptr i32, i32 *%ptr, i64 1 + store i32 5, i32 *%addr2 + ret i32 %ld +} + +define void @test_2x_2xs32(i32 *%ptr, i32 *%ptr2) { +; CHECK-LABEL: test_2x_2xs32: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x10, #9 +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: movk x10, #17, lsl #32 +; CHECK-NEXT: stp w8, w9, [x0] +; CHECK-NEXT: str x10, [x1] +; CHECK-NEXT: ret + %addr1 = getelementptr i32, i32 *%ptr, i64 0 + store i32 4, i32 *%addr1 + %addr2 = getelementptr i32, i32 *%ptr, i64 1 + store i32 5, i32 *%addr2 + + %addr3 = getelementptr i32, i32 *%ptr2, i64 0 + store i32 9, i32 *%addr3 + %addr4 = getelementptr i32, i32 *%ptr2, i64 1 + store i32 17, i32 *%addr4 + ret void +} + +define void @test_simple_var_2xs8(i8 *%ptr, i8 %v1, i8 %v2) { +; CHECK-LABEL: test_simple_var_2xs8: +; CHECK: ; %bb.0: +; CHECK-NEXT: strb w1, [x0] +; CHECK-NEXT: strb w2, [x0, #1] +; CHECK-NEXT: ret + %addr1 = getelementptr i8, i8 *%ptr, i64 0 + store i8 %v1, i8 *%addr1 + %addr2 = getelementptr i8, i8 *%ptr, i64 1 + store i8 %v2, i8 *%addr2 + ret void +} + +define void @test_simple_var_2xs16(i16 *%ptr, i16 %v1, i16 %v2) { +; CHECK-LABEL: test_simple_var_2xs16: +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w1, [x0] +; CHECK-NEXT: strh w2, [x0, #2] +; CHECK-NEXT: ret + %addr1 = getelementptr i16, i16 *%ptr, i64 0 + store i16 %v1, i16 *%addr1 + %addr2 = getelementptr i16, i16 *%ptr, i64 1 + store i16 %v2, i16 *%addr2 + ret void +} + +define void @test_simple_var_2xs32(i32 *%ptr, i32 %v1, i32 %v2) { +; CHECK-LABEL: test_simple_var_2xs32: +; CHECK: ; %bb.0: +; CHECK-NEXT: stp w1, w2, [x0] +; CHECK-NEXT: ret + %addr1 = getelementptr i32, i32 *%ptr, i64 0 + store i32 %v1, i32 *%addr1 + %addr2 = getelementptr i32, i32 *%ptr, i64 1 + store i32 %v2, i32 *%addr2 + ret void +} + + +; The store to ptr2 prevents merging into a single store. +; We can still merge the stores into addr1 and addr2. +define void @test_alias_4xs16(i16 *%ptr, i16 *%ptr2) { +; CHECK-LABEL: test_alias_4xs16: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: movk w8, #5, lsl #16 +; CHECK-NEXT: mov w9, #9 +; CHECK-NEXT: mov w10, #14 +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: strh w9, [x0, #4] +; CHECK-NEXT: strh wzr, [x1] +; CHECK-NEXT: strh w10, [x0, #6] +; CHECK-NEXT: ret + %addr1 = getelementptr i16, i16 *%ptr, i64 0 + store i16 4, i16 *%addr1 + %addr2 = getelementptr i16, i16 *%ptr, i64 1 + store i16 5, i16 *%addr2 + %addr3 = getelementptr i16, i16 *%ptr, i64 2 + store i16 9, i16 *%addr3 + store i16 0, i16 *%ptr2 + %addr4 = getelementptr i16, i16 *%ptr, i64 3 + store i16 14, i16 *%addr4 + ret void +} + +; Here store of 5 and 9 can be merged, others have aliasing barriers. +define void @test_alias2_4xs16(i16 *%ptr, i16 *%ptr2, i16* %ptr3) { +; CHECK-LABEL: test_alias2_4xs16: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: movk w9, #9, lsl #16 +; CHECK-NEXT: mov w10, #14 +; CHECK-NEXT: strh w8, [x0] +; CHECK-NEXT: strh wzr, [x2] +; CHECK-NEXT: stur w9, [x0, #2] +; CHECK-NEXT: strh wzr, [x1] +; CHECK-NEXT: strh w10, [x0, #6] +; CHECK-NEXT: ret + %addr1 = getelementptr i16, i16 *%ptr, i64 0 + store i16 4, i16 *%addr1 + %addr2 = getelementptr i16, i16 *%ptr, i64 1 + store i16 0, i16 *%ptr3 + store i16 5, i16 *%addr2 + %addr3 = getelementptr i16, i16 *%ptr, i64 2 + store i16 9, i16 *%addr3 + store i16 0, i16 *%ptr2 + %addr4 = getelementptr i16, i16 *%ptr, i64 3 + store i16 14, i16 *%addr4 + ret void +} + +; No merging can be done here. +define void @test_alias3_4xs16(i16 *%ptr, i16 *%ptr2, i16 *%ptr3, i16 *%ptr4) { +; CHECK-LABEL: test_alias3_4xs16: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: strh w8, [x0] +; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: strh wzr, [x2] +; CHECK-NEXT: mov w9, #9 +; CHECK-NEXT: strh w8, [x0, #2] +; CHECK-NEXT: mov w8, #14 +; CHECK-NEXT: strh wzr, [x3] +; CHECK-NEXT: strh w9, [x0, #4] +; CHECK-NEXT: strh wzr, [x1] +; CHECK-NEXT: strh w8, [x0, #6] +; CHECK-NEXT: ret + %addr1 = getelementptr i16, i16 *%ptr, i64 0 + store i16 4, i16 *%addr1 + %addr2 = getelementptr i16, i16 *%ptr, i64 1 + store i16 0, i16 *%ptr3 + store i16 5, i16 *%addr2 + store i16 0, i16 *%ptr4 + %addr3 = getelementptr i16, i16 *%ptr, i64 2 + store i16 9, i16 *%addr3 + store i16 0, i16 *%ptr2 + %addr4 = getelementptr i16, i16 *%ptr, i64 3 + store i16 14, i16 *%addr4 + ret void +} + +; Can merge because the load is from a different alloca and can't alias. +define i32 @test_alias_allocas_2xs32(i32 *%ptr) { +; CHECK-LABEL: test_alias_allocas_2xs32: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: ldr w0, [sp, #4] +; CHECK-NEXT: mov x8, #4 +; CHECK-NEXT: movk x8, #5, lsl #32 +; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret + %a1 = alloca [6 x i32] + %a2 = alloca i32, align 4 + %addr1 = getelementptr [6 x i32], [6 x i32] *%a1, i64 0, i32 0 + store i32 4, i32 *%addr1 + %ld = load i32, i32 *%a2 + %addr2 = getelementptr [6 x i32], [6 x i32] *%a1, i64 0, i32 1 + store i32 5, i32 *%addr2 + ret i32 %ld +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir @@ -0,0 +1,790 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=loadstore-opt -verify-machineinstrs %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + + define void @test_simple_2xs8(i8* %ptr) { + %addr11 = bitcast i8* %ptr to i8* + store i8 4, i8* %addr11, align 1 + %addr2 = getelementptr i8, i8* %ptr, i64 1 + store i8 5, i8* %addr2, align 1 + ret void + } + + define void @test_simple_2xs16(i16* %ptr) { + %addr11 = bitcast i16* %ptr to i16* + store i16 4, i16* %addr11, align 2 + %addr2 = getelementptr i16, i16* %ptr, i64 1 + store i16 5, i16* %addr2, align 2 + ret void + } + + define void @test_simple_2xs32(i32* %ptr) { + %addr11 = bitcast i32* %ptr to i32* + store i32 4, i32* %addr11, align 4 + %addr2 = getelementptr i32, i32* %ptr, i64 1 + store i32 5, i32* %addr2, align 4 + ret void + } + + define void @test_simple_2xs64_illegal(i64* %ptr) { + %addr11 = bitcast i64* %ptr to i64* + store i64 4, i64* %addr11, align 8 + %addr2 = getelementptr i64, i64* %ptr, i64 1 + store i64 5, i64* %addr2, align 8 + ret void + } + + define void @test_simple_vector(<2 x i16>* %ptr) { + %addr11 = bitcast <2 x i16>* %ptr to <2 x i16>* + store <2 x i16> , <2 x i16>* %addr11, align 4 + %addr2 = getelementptr <2 x i16>, <2 x i16>* %ptr, i64 1 + store <2 x i16> , <2 x i16>* %addr2, align 4 + ret void + } + + define i32 @test_unknown_alias(i32* %ptr, i32* %aliasptr) { + %addr11 = bitcast i32* %ptr to i32* + store i32 4, i32* %addr11, align 4 + %ld = load i32, i32* %aliasptr, align 4 + %addr2 = getelementptr i32, i32* %ptr, i64 1 + store i32 5, i32* %addr2, align 4 + ret i32 %ld + } + + define void @test_2x_2xs32(i32* %ptr, i32* %ptr2) { + %addr11 = bitcast i32* %ptr to i32* + store i32 4, i32* %addr11, align 4 + %addr2 = getelementptr i32, i32* %ptr, i64 1 + store i32 5, i32* %addr2, align 4 + %addr32 = bitcast i32* %ptr2 to i32* + store i32 9, i32* %addr32, align 4 + %addr4 = getelementptr i32, i32* %ptr2, i64 1 + store i32 17, i32* %addr4, align 4 + ret void + } + + define void @test_simple_var_2xs8(i8* %ptr, i8 %v1, i8 %v2) { + %addr11 = bitcast i8* %ptr to i8* + store i8 %v1, i8* %addr11, align 1 + %addr2 = getelementptr i8, i8* %ptr, i64 1 + store i8 %v2, i8* %addr2, align 1 + ret void + } + + define void @test_simple_var_2xs16(i16* %ptr, i16 %v1, i16 %v2) { + %addr11 = bitcast i16* %ptr to i16* + store i16 %v1, i16* %addr11, align 2 + %addr2 = getelementptr i16, i16* %ptr, i64 1 + store i16 %v2, i16* %addr2, align 2 + ret void + } + + define void @test_simple_var_2xs32(i32* %ptr, i32 %v1, i32 %v2) { + %addr11 = bitcast i32* %ptr to i32* + store i32 %v1, i32* %addr11, align 4 + %addr2 = getelementptr i32, i32* %ptr, i64 1 + store i32 %v2, i32* %addr2, align 4 + ret void + } + + define void @test_alias_4xs16(i16* %ptr, i16* %ptr2) { + %addr11 = bitcast i16* %ptr to i16* + store i16 4, i16* %addr11, align 2 + %addr2 = getelementptr i16, i16* %ptr, i64 1 + store i16 5, i16* %addr2, align 2 + %addr3 = getelementptr i16, i16* %ptr, i64 2 + store i16 9, i16* %addr3, align 2 + store i16 0, i16* %ptr2, align 2 + %addr4 = getelementptr i16, i16* %ptr, i64 3 + store i16 14, i16* %addr4, align 2 + ret void + } + + define void @test_alias2_4xs16(i16* %ptr, i16* %ptr2, i16* %ptr3) { + %addr11 = bitcast i16* %ptr to i16* + store i16 4, i16* %addr11, align 2 + %addr2 = getelementptr i16, i16* %ptr, i64 1 + store i16 0, i16* %ptr3, align 2 + store i16 5, i16* %addr2, align 2 + %addr3 = getelementptr i16, i16* %ptr, i64 2 + store i16 9, i16* %addr3, align 2 + store i16 0, i16* %ptr2, align 2 + %addr4 = getelementptr i16, i16* %ptr, i64 3 + store i16 14, i16* %addr4, align 2 + ret void + } + + define void @test_alias3_4xs16(i16* %ptr, i16* %ptr2, i16* %ptr3, i16* %ptr4) { + %addr11 = bitcast i16* %ptr to i16* + store i16 4, i16* %addr11, align 2 + %addr2 = getelementptr i16, i16* %ptr, i64 1 + store i16 0, i16* %ptr3, align 2 + store i16 5, i16* %addr2, align 2 + store i16 0, i16* %ptr4, align 2 + %addr3 = getelementptr i16, i16* %ptr, i64 2 + store i16 9, i16* %addr3, align 2 + store i16 0, i16* %ptr2, align 2 + %addr4 = getelementptr i16, i16* %ptr, i64 3 + store i16 14, i16* %addr4, align 2 + ret void + } + + define i32 @test_alias_allocas_2xs32(i32* %ptr) { + %a1 = alloca [6 x i32], align 4 + %a2 = alloca i32, align 4 + %addr11 = bitcast [6 x i32]* %a1 to i32* + store i32 4, i32* %addr11, align 4 + %ld = load i32, i32* %a2, align 4 + %addr2 = getelementptr [6 x i32], [6 x i32]* %a1, i64 0, i32 1 + store i32 5, i32* %addr2, align 4 + ret i32 %ld + } + + define void @test_simple_2xs32_with_align(i32* %ptr) { + %addr11 = bitcast i32* %ptr to i32* + store i32 4, i32* %addr11, align 4 + %addr2 = getelementptr i32, i32* %ptr, i64 1 + store i32 5, i32* %addr2, align 4 + ret void + } + +... +--- +name: test_simple_2xs8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0 + + ; CHECK-LABEL: name: test_simple_2xs8 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 4 + ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 5 + ; CHECK: G_STORE [[C]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr11) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[C1]](s8), [[PTR_ADD]](p0) :: (store (s8) into %ir.addr2) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(s8) = G_CONSTANT i8 4 + %4:_(s8) = G_CONSTANT i8 5 + G_STORE %1(s8), %0(p0) :: (store (s8) into %ir.addr11) + %2:_(s64) = G_CONSTANT i64 1 + %3:_(p0) = G_PTR_ADD %0, %2(s64) + G_STORE %4(s8), %3(p0) :: (store (s8) into %ir.addr2) + RET_ReallyLR + +... +--- +name: test_simple_2xs16 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0 + + ; CHECK-LABEL: name: test_simple_2xs16 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 327684 + ; CHECK: G_STORE [[C3]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11, align 2) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(s16) = G_CONSTANT i16 4 + %4:_(s16) = G_CONSTANT i16 5 + G_STORE %1(s16), %0(p0) :: (store (s16) into %ir.addr11) + %2:_(s64) = G_CONSTANT i64 2 + %3:_(p0) = G_PTR_ADD %0, %2(s64) + G_STORE %4(s16), %3(p0) :: (store (s16) into %ir.addr2) + RET_ReallyLR + +... +--- +name: test_simple_2xs32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0 + + ; CHECK-LABEL: name: test_simple_2xs32 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484 + ; CHECK: G_STORE [[C3]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(s32) = G_CONSTANT i32 4 + %4:_(s32) = G_CONSTANT i32 5 + G_STORE %1(s32), %0(p0) :: (store (s32) into %ir.addr11) + %2:_(s64) = G_CONSTANT i64 4 + %3:_(p0) = G_PTR_ADD %0, %2(s64) + G_STORE %4(s32), %3(p0) :: (store (s32) into %ir.addr2) + RET_ReallyLR + +... +--- +name: test_simple_2xs64_illegal +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0 + + ; CHECK-LABEL: name: test_simple_2xs64_illegal + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[C1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.addr2) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(s64) = G_CONSTANT i64 4 + %4:_(s64) = G_CONSTANT i64 5 + G_STORE %1(s64), %0(p0) :: (store (s64) into %ir.addr11) + %2:_(s64) = G_CONSTANT i64 8 + %3:_(p0) = G_PTR_ADD %0, %2(s64) + G_STORE %4(s64), %3(p0) :: (store (s64) into %ir.addr2) + RET_ReallyLR + +... +--- +name: test_simple_vector +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0 + + ; CHECK-LABEL: name: test_simple_vector + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 7 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 + ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C3]](s16) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s16>), [[COPY]](p0) :: (store (<2 x s16>) into %ir.addr11) + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s16>), [[PTR_ADD]](p0) :: (store (<2 x s16>) into %ir.addr2) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %2:_(s16) = G_CONSTANT i16 4 + %3:_(s16) = G_CONSTANT i16 7 + %1:_(<2 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16) + %7:_(s16) = G_CONSTANT i16 5 + %8:_(s16) = G_CONSTANT i16 8 + %6:_(<2 x s16>) = G_BUILD_VECTOR %7(s16), %8(s16) + G_STORE %1(<2 x s16>), %0(p0) :: (store (<2 x s16>) into %ir.addr11) + %4:_(s64) = G_CONSTANT i64 4 + %5:_(p0) = G_PTR_ADD %0, %4(s64) + G_STORE %6(<2 x s16>), %5(p0) :: (store (<2 x s16>) into %ir.addr2) + RET_ReallyLR + +... +--- +name: test_unknown_alias +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_unknown_alias + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.aliasptr) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s32) = G_CONSTANT i32 4 + %6:_(s32) = G_CONSTANT i32 5 + G_STORE %2(s32), %0(p0) :: (store (s32) into %ir.addr11) + %3:_(s32) = G_LOAD %1(p0) :: (load (s32) from %ir.aliasptr) + %4:_(s64) = G_CONSTANT i64 4 + %5:_(p0) = G_PTR_ADD %0, %4(s64) + G_STORE %6(s32), %5(p0) :: (store (s32) into %ir.addr2) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_2x_2xs32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_2x_2xs32 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11) + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 73014444041 + ; CHECK: G_STORE [[C5]](s64), [[COPY1]](p0) :: (store (s64) into %ir.addr32, align 4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s32) = G_CONSTANT i32 4 + %5:_(s32) = G_CONSTANT i32 5 + %6:_(s32) = G_CONSTANT i32 9 + %8:_(s32) = G_CONSTANT i32 17 + G_STORE %2(s32), %0(p0) :: (store (s32) into %ir.addr11) + %3:_(s64) = G_CONSTANT i64 4 + %4:_(p0) = G_PTR_ADD %0, %3(s64) + G_STORE %5(s32), %4(p0) :: (store (s32) into %ir.addr2) + G_STORE %6(s32), %1(p0) :: (store (s32) into %ir.addr32) + %7:_(p0) = G_PTR_ADD %1, %3(s64) + G_STORE %8(s32), %7(p0) :: (store (s32) into %ir.addr4) + RET_ReallyLR + +... +--- +name: test_simple_var_2xs8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$w2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w1, $w2, $x0 + + ; CHECK-LABEL: name: test_simple_var_2xs8 + ; CHECK: liveins: $w1, $w2, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr11) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[TRUNC1]](s8), [[PTR_ADD]](p0) :: (store (s8) into %ir.addr2) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %3:_(s32) = COPY $w1 + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s32) = COPY $w2 + %2:_(s8) = G_TRUNC %4(s32) + G_STORE %1(s8), %0(p0) :: (store (s8) into %ir.addr11) + %5:_(s64) = G_CONSTANT i64 1 + %6:_(p0) = G_PTR_ADD %0, %5(s64) + G_STORE %2(s8), %6(p0) :: (store (s8) into %ir.addr2) + RET_ReallyLR + +... +--- +name: test_simple_var_2xs16 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$w2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w1, $w2, $x0 + + ; CHECK-LABEL: name: test_simple_var_2xs16 + ; CHECK: liveins: $w1, $w2, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: G_STORE [[TRUNC]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr2) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %3:_(s32) = COPY $w1 + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s32) = COPY $w2 + %2:_(s16) = G_TRUNC %4(s32) + G_STORE %1(s16), %0(p0) :: (store (s16) into %ir.addr11) + %5:_(s64) = G_CONSTANT i64 2 + %6:_(p0) = G_PTR_ADD %0, %5(s64) + G_STORE %2(s16), %6(p0) :: (store (s16) into %ir.addr2) + RET_ReallyLR + +... +--- +name: test_simple_var_2xs32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$w2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w1, $w2, $x0 + + ; CHECK-LABEL: name: test_simple_var_2xs32 + ; CHECK: liveins: $w1, $w2, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(s32) = COPY $w1 + %2:_(s32) = COPY $w2 + G_STORE %1(s32), %0(p0) :: (store (s32) into %ir.addr11) + %3:_(s64) = G_CONSTANT i64 4 + %4:_(p0) = G_PTR_ADD %0, %3(s64) + G_STORE %2(s32), %4(p0) :: (store (s32) into %ir.addr2) + RET_ReallyLR + +... +--- +name: test_alias_4xs16 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0, $x1 + + ; The store to ptr2 prevents merging into a single store. + ; We can still merge the stores into addr1 and addr2. + + ; CHECK-LABEL: name: test_alias_4xs16 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 327684 + ; CHECK: G_STORE [[C6]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr11, align 2) + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; CHECK: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr3) + ; CHECK: G_STORE [[C3]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2) + ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) + ; CHECK: G_STORE [[C4]](s16), [[PTR_ADD2]](p0) :: (store (s16) into %ir.addr4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s16) = G_CONSTANT i16 4 + %5:_(s16) = G_CONSTANT i16 5 + %8:_(s16) = G_CONSTANT i16 9 + %9:_(s16) = G_CONSTANT i16 0 + %12:_(s16) = G_CONSTANT i16 14 + G_STORE %2(s16), %0(p0) :: (store (s16) into %ir.addr11) + %3:_(s64) = G_CONSTANT i64 2 + %4:_(p0) = G_PTR_ADD %0, %3(s64) + G_STORE %5(s16), %4(p0) :: (store (s16) into %ir.addr2) + %6:_(s64) = G_CONSTANT i64 4 + %7:_(p0) = G_PTR_ADD %0, %6(s64) + G_STORE %8(s16), %7(p0) :: (store (s16) into %ir.addr3) + G_STORE %9(s16), %1(p0) :: (store (s16) into %ir.ptr2) + %10:_(s64) = G_CONSTANT i64 6 + %11:_(p0) = G_PTR_ADD %0, %10(s64) + G_STORE %12(s16), %11(p0) :: (store (s16) into %ir.addr4) + RET_ReallyLR + +... +--- +name: test_alias2_4xs16 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0, $x1, $x2 + ; Here store of 5 and 9 can be merged, others have aliasing barriers. + ; CHECK-LABEL: name: test_alias2_4xs16 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 + ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 + ; CHECK: G_STORE [[C]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK: G_STORE [[C1]](s16), [[COPY2]](p0) :: (store (s16) into %ir.ptr3) + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 589829 + ; CHECK: G_STORE [[C7]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.addr2, align 2) + ; CHECK: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2) + ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) + ; CHECK: G_STORE [[C4]](s16), [[PTR_ADD2]](p0) :: (store (s16) into %ir.addr4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(p0) = COPY $x2 + %3:_(s16) = G_CONSTANT i16 4 + %6:_(s16) = G_CONSTANT i16 0 + %7:_(s16) = G_CONSTANT i16 5 + %10:_(s16) = G_CONSTANT i16 9 + %13:_(s16) = G_CONSTANT i16 14 + G_STORE %3(s16), %0(p0) :: (store (s16) into %ir.addr11) + %4:_(s64) = G_CONSTANT i64 2 + %5:_(p0) = G_PTR_ADD %0, %4(s64) + G_STORE %6(s16), %2(p0) :: (store (s16) into %ir.ptr3) + G_STORE %7(s16), %5(p0) :: (store (s16) into %ir.addr2) + %8:_(s64) = G_CONSTANT i64 4 + %9:_(p0) = G_PTR_ADD %0, %8(s64) + G_STORE %10(s16), %9(p0) :: (store (s16) into %ir.addr3) + G_STORE %6(s16), %1(p0) :: (store (s16) into %ir.ptr2) + %11:_(s64) = G_CONSTANT i64 6 + %12:_(p0) = G_PTR_ADD %0, %11(s64) + G_STORE %13(s16), %12(p0) :: (store (s16) into %ir.addr4) + RET_ReallyLR + +... +--- +name: test_alias3_4xs16 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } + - { reg: '$x3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0, $x1, $x2, $x3 + + ; No merging can be done here. + + ; CHECK-LABEL: name: test_alias3_4xs16 + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 + ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 + ; CHECK: G_STORE [[C]](s16), [[COPY]](p0) :: (store (s16) into %ir.addr11) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK: G_STORE [[C1]](s16), [[COPY2]](p0) :: (store (s16) into %ir.ptr3) + ; CHECK: G_STORE [[C2]](s16), [[PTR_ADD]](p0) :: (store (s16) into %ir.addr2) + ; CHECK: G_STORE [[C1]](s16), [[COPY3]](p0) :: (store (s16) into %ir.ptr4) + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; CHECK: G_STORE [[C3]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.addr3) + ; CHECK: G_STORE [[C1]](s16), [[COPY1]](p0) :: (store (s16) into %ir.ptr2) + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; CHECK: G_STORE [[C4]](s16), [[PTR_ADD2]](p0) :: (store (s16) into %ir.addr4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(p0) = COPY $x2 + %3:_(p0) = COPY $x3 + %4:_(s16) = G_CONSTANT i16 4 + %7:_(s16) = G_CONSTANT i16 0 + %8:_(s16) = G_CONSTANT i16 5 + %11:_(s16) = G_CONSTANT i16 9 + %14:_(s16) = G_CONSTANT i16 14 + G_STORE %4(s16), %0(p0) :: (store (s16) into %ir.addr11) + %5:_(s64) = G_CONSTANT i64 2 + %6:_(p0) = G_PTR_ADD %0, %5(s64) + G_STORE %7(s16), %2(p0) :: (store (s16) into %ir.ptr3) + G_STORE %8(s16), %6(p0) :: (store (s16) into %ir.addr2) + G_STORE %7(s16), %3(p0) :: (store (s16) into %ir.ptr4) + %9:_(s64) = G_CONSTANT i64 4 + %10:_(p0) = G_PTR_ADD %0, %9(s64) + G_STORE %11(s16), %10(p0) :: (store (s16) into %ir.addr3) + G_STORE %7(s16), %1(p0) :: (store (s16) into %ir.ptr2) + %12:_(s64) = G_CONSTANT i64 6 + %13:_(p0) = G_PTR_ADD %0, %12(s64) + G_STORE %14(s16), %13(p0) :: (store (s16) into %ir.addr4) + RET_ReallyLR + +... +--- +name: test_alias_allocas_2xs32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, name: a1, size: 24, alignment: 4 } + - { id: 1, name: a2, size: 4, alignment: 4 } +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0 + + ; Can merge because the load is from a different alloca and can't alias. + + ; CHECK-LABEL: name: test_alias_allocas_2xs32 + ; CHECK: liveins: $x0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a1 + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.a2 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (s32) from %ir.a2) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484 + ; CHECK: G_STORE [[C3]](s64), [[FRAME_INDEX]](p0) :: (store (s64) into %ir.addr11, align 4) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %3:_(s32) = G_CONSTANT i32 4 + %7:_(s32) = G_CONSTANT i32 5 + %1:_(p0) = G_FRAME_INDEX %stack.0.a1 + %2:_(p0) = G_FRAME_INDEX %stack.1.a2 + G_STORE %3(s32), %1(p0) :: (store (s32) into %ir.addr11) + %4:_(s32) = G_LOAD %2(p0) :: (dereferenceable load (s32) from %ir.a2) + %5:_(s64) = G_CONSTANT i64 4 + %6:_(p0) = G_PTR_ADD %1, %5(s64) + G_STORE %7(s32), %6(p0) :: (store (s32) into %ir.addr2) + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_simple_2xs32_with_align +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0 + + ; CHECK-LABEL: name: test_simple_2xs32_with_align + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 21474836484 + ; CHECK: G_STORE [[C3]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr11, align 2) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(s32) = G_CONSTANT i32 4 + %4:_(s32) = G_CONSTANT i32 5 + G_STORE %1(s32), %0(p0) :: (store (s32) into %ir.addr11, align 2) + %2:_(s64) = G_CONSTANT i64 4 + %3:_(p0) = G_PTR_ADD %0, %2(s64) + G_STORE %4(s32), %3(p0) :: (store (s32) into %ir.addr2, align 2) + RET_ReallyLR + +... diff --git a/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt b/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt --- a/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt +++ b/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt @@ -22,4 +22,5 @@ KnownBitsTest.cpp KnownBitsVectorTest.cpp GISelUtilsTest.cpp + GISelAliasTest.cpp ) diff --git a/llvm/unittests/CodeGen/GlobalISel/GISelAliasTest.cpp b/llvm/unittests/CodeGen/GlobalISel/GISelAliasTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/CodeGen/GlobalISel/GISelAliasTest.cpp @@ -0,0 +1,144 @@ +//===- GISelAliasTest.cpp--------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "GISelMITest.h" +#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/AtomicOrdering.h" +#include "gtest/gtest.h" + +namespace { + +// Test simple aliasing. +TEST_F(AArch64GISelMITest, SimpleAlias) { + setUp(); + if (!TM) + return; + + LLT S64 = LLT::scalar(64); + LLT P0 = LLT::pointer(0, 64); + + auto Base = B.buildIntToPtr(P0, Copies[0]); + auto Base2 = B.buildIntToPtr(P0, Copies[1]); + // These two addresses are identical. + auto Addr = B.buildPtrAdd(P0, Base, B.buildConstant(S64, 8)); + auto Addr2 = B.buildPtrAdd(P0, Base, B.buildConstant(S64, 8)); + + MachinePointerInfo PtrInfo; + auto *LoadMMO = MF->getMachineMemOperand( + PtrInfo, MachineMemOperand::Flags::MOLoad, S64, Align()); + auto Ld1 = B.buildLoad(S64, Addr, *LoadMMO); + auto Ld2 = B.buildLoad(S64, Addr2, *LoadMMO); + + // We expect the same address to return alias. + EXPECT_TRUE(GISelAddressing::instMayAlias(*Ld1, *Ld2, *MRI, nullptr)); + + // Expect both being volatile to say alias, since we can't reorder them. + auto *LoadVolMMO = MF->getMachineMemOperand( + LoadMMO, + MachineMemOperand::Flags::MOLoad | MachineMemOperand::Flags::MOVolatile); + // Pick a different address so we don't trivially match the alias case above. + auto VolLd1 = B.buildLoad(S64, Addr, *LoadVolMMO); + auto VolLd2 = B.buildLoad(S64, Base2, *LoadVolMMO); + EXPECT_TRUE(GISelAddressing::instMayAlias(*VolLd1, *VolLd2, *MRI, nullptr)); + + // Same for atomics. + auto *LoadAtomicMMO = MF->getMachineMemOperand( + PtrInfo, MachineMemOperand::Flags::MOLoad, S64, Align(8), AAMDNodes(), + nullptr, SyncScope::System, AtomicOrdering::Acquire); + auto AtomicLd1 = B.buildLoad(S64, Addr, *LoadAtomicMMO); + auto AtomicLd2 = B.buildLoad(S64, Base2, *LoadAtomicMMO); + EXPECT_TRUE( + GISelAddressing::instMayAlias(*AtomicLd1, *AtomicLd2, *MRI, nullptr)); + + // Invariant memory with stores. + auto *LoadInvariantMMO = MF->getMachineMemOperand( + LoadMMO, + MachineMemOperand::Flags::MOLoad | MachineMemOperand::Flags::MOInvariant); + auto InvariantLd = B.buildLoad(S64, Addr, *LoadInvariantMMO); + auto Store = B.buildStore(B.buildConstant(S64, 0), Base2, PtrInfo, Align()); + EXPECT_FALSE( + GISelAddressing::instMayAlias(*InvariantLd, *Store, *MRI, nullptr)); +} + +// Test aliasing checks for same base + different offsets. +TEST_F(AArch64GISelMITest, OffsetAliasing) { + setUp(); + if (!TM) + return; + + LLT S64 = LLT::scalar(64); + LLT P0 = LLT::pointer(0, 64); + + auto Base = B.buildIntToPtr(P0, Copies[0]); + auto Addr = B.buildPtrAdd(P0, Base, B.buildConstant(S64, 8)); + auto Addr2 = B.buildPtrAdd(P0, Base, B.buildConstant(S64, 16)); + + MachinePointerInfo PtrInfo; + auto *LoadMMO = MF->getMachineMemOperand( + PtrInfo, MachineMemOperand::Flags::MOLoad, S64, Align()); + auto Ld1 = B.buildLoad(S64, Addr, *LoadMMO); + auto Ld2 = B.buildLoad(S64, Addr2, *LoadMMO); + + // The offset between the two addresses is >= than the size of access. + // Can't alias. + EXPECT_FALSE(GISelAddressing::instMayAlias(*Ld1, *Ld2, *MRI, nullptr)); + EXPECT_FALSE(GISelAddressing::instMayAlias(*Ld2, *Ld1, *MRI, nullptr)); + + auto Addr3 = B.buildPtrAdd(P0, Base, B.buildConstant(S64, 4)); + auto Ld3 = B.buildLoad(S64, Addr3, *LoadMMO); + // Offset of 4 is < the size of access, 8 bytes. + EXPECT_TRUE(GISelAddressing::instMayAlias(*Ld1, *Ld3, *MRI, nullptr)); +} + +// Test aliasing checks for frame indexes. +TEST_F(AArch64GISelMITest, FrameIndexAliasing) { + setUp(); + if (!TM) + return; + + LLT S64 = LLT::scalar(64); + LLT P0 = LLT::pointer(0, 64); + + auto &MFI = MF->getFrameInfo(); + auto FixedFI1 = MFI.CreateFixedObject(8, 0, true); + auto FixedFI2 = MFI.CreateFixedObject(8, 8, true); + + auto FI1 = MFI.CreateStackObject(8, Align(8), false); + auto GFI1 = B.buildFrameIndex(P0, FI1); + // This G_FRAME_INDEX is separate but refers to the same index. + auto GFI2 = B.buildFrameIndex(P0, FI1); + + MachinePointerInfo PtrInfo; + auto *LoadMMO = MF->getMachineMemOperand( + PtrInfo, MachineMemOperand::Flags::MOLoad, S64, Align()); + auto Ld1 = B.buildLoad(S64, GFI1, *LoadMMO); + auto Ld2 = B.buildLoad(S64, GFI2, *LoadMMO); + + // The offset between the two addresses is >= than the size of access. + // Can't alias. + EXPECT_FALSE(GISelAddressing::instMayAlias(*Ld1, *Ld2, *MRI, nullptr)); + + + auto GFixedFI1 = B.buildFrameIndex(P0, FixedFI1); + auto GFixedFI2 = B.buildFrameIndex(P0, FixedFI2); + auto FixedFILd1 = B.buildLoad(S64, GFixedFI1, *LoadMMO); + auto FixedFILd2 = B.buildLoad(S64, GFixedFI2, *LoadMMO); + // If we have two different FrameIndex bases, but at least one is not a fixed + // object, then we can say they don't alias. If both were fixed, then we could + // have multiple frameindex slots being accessed at once since their relative + // positions are known. However, if one is not fixed, then they can't alias + // because non-fixed FIs are only given offsets during PEI. + EXPECT_FALSE(GISelAddressing::instMayAlias(*FixedFILd1, *Ld1, *MRI, nullptr)); + EXPECT_TRUE( + GISelAddressing::instMayAlias(*FixedFILd1, *FixedFILd2, *MRI, nullptr)); +} + +} // namespace