Index: llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -569,6 +569,7 @@ /// Current optimization remark emitter. Used to report failures. std::unique_ptr ORE; + AAResults *AA; FunctionLoweringInfo FuncInfo; // True when either the Target Machine specifies no optimizations or the Index: llvm/include/llvm/CodeGen/LiveIntervals.h =================================================================== --- llvm/include/llvm/CodeGen/LiveIntervals.h +++ llvm/include/llvm/CodeGen/LiveIntervals.h @@ -55,8 +55,7 @@ MachineFunction* MF; MachineRegisterInfo* MRI; const TargetRegisterInfo* TRI; - const TargetInstrInfo* TII; - AAResults *AA; + const TargetInstrInfo *TII; SlotIndexes* Indexes; MachineDominatorTree *DomTree = nullptr; LiveIntervalCalc *LICalc = nullptr; @@ -212,10 +211,6 @@ return Indexes; } - AAResults *getAliasAnalysis() const { - return AA; - } - /// Returns true if the specified machine instr has been removed or was /// never entered in the map. bool isNotInMIMap(const MachineInstr &Instr) const { Index: llvm/include/llvm/CodeGen/LiveRangeEdit.h =================================================================== --- llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -32,7 +32,6 @@ namespace llvm { -class AAResults; class LiveIntervals; class MachineInstr; class MachineOperand; @@ -93,7 +92,7 @@ SmallPtrSet Rematted; /// scanRemattable - Identify the Parent values that may rematerialize. - void scanRemattable(AAResults *aa); + void scanRemattable(); /// foldAsLoad - If LI has a single use and a single def that can be folded as /// a load, eliminate the register by folding the def into the use. @@ -103,8 +102,7 @@ SmallPtrSet>; /// Helper for eliminateDeadDefs. - void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, - AAResults *AA); + void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink); /// MachineRegisterInfo callback to notify when new virtual /// registers are created. @@ -184,12 +182,11 @@ /// anyRematerializable - Return true if any parent values may be /// rematerializable. /// This function must be called before any rematerialization is attempted. - bool anyRematerializable(AAResults *); + bool anyRematerializable(); /// checkRematerializable - Manually add VNI to the list of rematerializable /// values if DefMI may be rematerializable. - bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - AAResults *); + bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI); /// Remat - Information needed to rematerialize at a specific location. struct Remat { @@ -242,8 +239,7 @@ /// allocator. These registers should not be split into new intervals /// as currently those new intervals are not guaranteed to spill. void eliminateDeadDefs(SmallVectorImpl &Dead, - ArrayRef RegsBeingSpilled = None, - AAResults *AA = nullptr); + ArrayRef RegsBeingSpilled = None); /// calculateRegClassAndHint - Recompute register class and hint for each new /// register. Index: llvm/include/llvm/CodeGen/MachineInstr.h =================================================================== --- llvm/include/llvm/CodeGen/MachineInstr.h +++ llvm/include/llvm/CodeGen/MachineInstr.h @@ -1620,7 +1620,7 @@ /// argument area of a function (if it does not change). If the instruction /// does multiple loads, this returns true only if all of the loads are /// dereferenceable and invariant. - bool isDereferenceableInvariantLoad(AAResults *AA) const; + bool isDereferenceableInvariantLoad() const; /// If the specified instruction is a PHI that always merges together the /// same virtual register, return the register, otherwise return 0. Index: llvm/include/llvm/CodeGen/SelectionDAG.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAG.h +++ llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1043,13 +1043,15 @@ bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo = AAMDNodes()); + const AAMDNodes &AAInfo = AAMDNodes(), + AAResults *AA = nullptr); SDValue getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo = AAMDNodes()); + const AAMDNodes &AAInfo = AAMDNodes(), + AAResults *AA = nullptr); SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -121,12 +121,11 @@ /// This means the only allowed uses are constants and unallocatable physical /// registers so that the instructions result is independent of the place /// in the function. - bool isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA = nullptr) const { + bool isTriviallyReMaterializable(const MachineInstr &MI) const { return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF || (MI.getDesc().isRematerializable() && - (isReallyTriviallyReMaterializable(MI, AA) || - isReallyTriviallyReMaterializableGeneric(MI, AA))); + (isReallyTriviallyReMaterializable(MI) || + isReallyTriviallyReMaterializableGeneric(MI))); } /// Given \p MO is a PhysReg use return if it can be ignored for the purpose @@ -143,8 +142,7 @@ /// than producing a value, or if it requres any address registers that are /// not always available. /// Requirements must be check as stated in isTriviallyReMaterializable() . - virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { + virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const { return false; } @@ -186,8 +184,7 @@ /// set and the target hook isReallyTriviallyReMaterializable returns false, /// this function does target-independent tests to determine if the /// instruction is really trivially rematerializable. - bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI, - AAResults *AA) const; + bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI) const; public: /// These methods return the opcode of the frame setup/destroy instructions Index: llvm/lib/CodeGen/CalcSpillWeights.cpp =================================================================== --- llvm/lib/CodeGen/CalcSpillWeights.cpp +++ llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -121,7 +121,7 @@ assert(MI && "Dead valno in interval"); } - if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis())) + if (!TII.isTriviallyReMaterializable(*MI)) return false; } return true; Index: llvm/lib/CodeGen/EarlyIfConversion.cpp =================================================================== --- llvm/lib/CodeGen/EarlyIfConversion.cpp +++ llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -576,7 +576,7 @@ // If the instruction could modify memory, or there may be some intervening // store between the two, we can't consider them to be equal. - if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad(nullptr)) + if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad()) return false; // We also can't guarantee that they are the same if, for example, the Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2363,7 +2363,7 @@ // loading from. To be safe, let's just assume that all loads and stores // are different (unless we have something which is guaranteed to not // change.) - if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) + if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad()) return false; // If both instructions are loads or stores, they are equal only if both @@ -2374,7 +2374,7 @@ if (!LS1 || !LS2) return false; - if (!I2->isDereferenceableInvariantLoad(nullptr) || + if (!I2->isDereferenceableInvariantLoad() || (LS1->getMemSizeInBits() != LS2->getMemSizeInBits())) return false; } Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" @@ -166,8 +167,10 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None) { AU.addRequired(); + AU.addRequired(); + } AU.addRequired(); AU.addPreserved(); getSelectionDAGFallbackAnalysisUsage(AU); @@ -1275,26 +1278,41 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { const LoadInst &LI = cast(U); - if (DL->getTypeStoreSize(LI.getType()) == 0) + + unsigned StoreSize = DL->getTypeStoreSize(LI.getType()); + if (StoreSize == 0) return true; ArrayRef Regs = getOrCreateVRegs(LI); ArrayRef Offsets = *VMap.getOffsets(LI); Register Base = getOrCreateVReg(*LI.getPointerOperand()); + AAMDNodes AAInfo = LI.getAAMetadata(); - Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType()); + const Value *Ptr = LI.getPointerOperand(); + Type *OffsetIRTy = DL->getIntPtrType(Ptr->getType()); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); - if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) { + if (CLI->supportSwiftError() && isSwiftError(Ptr)) { assert(Regs.size() == 1 && "swifterror should be single pointer"); - Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), - LI.getPointerOperand()); + Register VReg = + SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), Ptr); MIRBuilder.buildCopy(Regs[0], VReg); return true; } auto &TLI = *MF->getSubtarget().getTargetLowering(); MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL); + if (AA && !(Flags & MachineMemOperand::MOInvariant)) { + if (AA->pointsToConstantMemory( + MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) { + Flags |= MachineMemOperand::MOInvariant; + + // FIXME: pointsToConstantMemory probably does not imply dereferenceable, + // but the previous usage implied it did. Probably should check + // isDereferenceableAndAlignedPointer. + Flags |= MachineMemOperand::MODereferenceable; + } + } const MDNode *Ranges = Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; @@ -1306,7 +1324,7 @@ Align BaseAlign = getMemOpAlign(LI); auto MMO = MF->getMachineMemOperand( Ptr, Flags, MRI->getType(Regs[i]), - commonAlignment(BaseAlign, Offsets[i] / 8), LI.getAAMetadata(), Ranges, + commonAlignment(BaseAlign, Offsets[i] / 8), AAInfo, Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); } @@ -1563,9 +1581,9 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, unsigned Opcode) { - + const Value *SrcPtr = CI.getArgOperand(1); // If the source is undef, then just emit a nop. - if (isa(CI.getArgOperand(1))) + if (isa(SrcPtr)) return true; SmallVector SrcRegs; @@ -1595,15 +1613,20 @@ unsigned IsVol = cast(CI.getArgOperand(CI.arg_size() - 1))->getZExtValue(); + ConstantInt *CopySize = nullptr; + if (auto *MCI = dyn_cast(&CI)) { DstAlign = MCI->getDestAlign().valueOrOne(); SrcAlign = MCI->getSourceAlign().valueOrOne(); + CopySize = dyn_cast(MCI->getArgOperand(2)); } else if (auto *MCI = dyn_cast(&CI)) { DstAlign = MCI->getDestAlign().valueOrOne(); SrcAlign = MCI->getSourceAlign().valueOrOne(); + CopySize = dyn_cast(MCI->getArgOperand(2)); } else if (auto *MMI = dyn_cast(&CI)) { DstAlign = MMI->getDestAlign().valueOrOne(); SrcAlign = MMI->getSourceAlign().valueOrOne(); + CopySize = dyn_cast(MMI->getArgOperand(2)); } else { auto *MSI = cast(&CI); DstAlign = MSI->getDestAlign().valueOrOne(); @@ -1617,14 +1640,31 @@ } // Create mem operands to store the alignment and volatile info. - auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; - ICall.addMemOperand(MF->getMachineMemOperand( - MachinePointerInfo(CI.getArgOperand(0)), - MachineMemOperand::MOStore | VolFlag, 1, DstAlign)); + MachineMemOperand::Flags LoadFlags = MachineMemOperand::MOLoad; + MachineMemOperand::Flags StoreFlags = MachineMemOperand::MOStore; + if (IsVol) { + LoadFlags |= MachineMemOperand::MOVolatile; + StoreFlags |= MachineMemOperand::MOVolatile; + } + + AAMDNodes AAInfo = CI.getAAMetadata(); + if (AA && CopySize && + AA->pointsToConstantMemory(MemoryLocation( + SrcPtr, LocationSize::precise(CopySize->getZExtValue()), AAInfo))) { + LoadFlags |= MachineMemOperand::MOInvariant; + + // FIXME: pointsToConstantMemory probably does not imply dereferenceable, + // but the previous usage implied it did. Probably should check + // isDereferenceableAndAlignedPointer. + LoadFlags |= MachineMemOperand::MODereferenceable; + } + + ICall.addMemOperand( + MF->getMachineMemOperand(MachinePointerInfo(CI.getArgOperand(0)), + StoreFlags, 1, DstAlign, AAInfo)); if (Opcode != TargetOpcode::G_MEMSET) ICall.addMemOperand(MF->getMachineMemOperand( - MachinePointerInfo(CI.getArgOperand(1)), - MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign)); + MachinePointerInfo(SrcPtr), LoadFlags, 1, SrcAlign, AAInfo)); return true; } @@ -3336,10 +3376,13 @@ TM.resetTargetOptions(F); EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F); FuncInfo.MF = MF; - if (EnableOpts) + if (EnableOpts) { + AA = &getAnalysis().getAAResults(); FuncInfo.BPI = &getAnalysis().getBPI(); - else + } else { + AA = nullptr; FuncInfo.BPI = nullptr; + } FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF); Index: llvm/lib/CodeGen/InlineSpiller.cpp =================================================================== --- llvm/lib/CodeGen/InlineSpiller.cpp +++ llvm/lib/CodeGen/InlineSpiller.cpp @@ -86,7 +86,6 @@ MachineFunction &MF; LiveIntervals &LIS; LiveStacks &LSS; - AliasAnalysis *AA; MachineDominatorTree &MDT; MachineLoopInfo &Loops; VirtRegMap &VRM; @@ -140,7 +139,6 @@ VirtRegMap &vrm) : MF(mf), LIS(pass.getAnalysis()), LSS(pass.getAnalysis()), - AA(&pass.getAnalysis().getAAResults()), MDT(pass.getAnalysis()), Loops(pass.getAnalysis()), VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), @@ -159,7 +157,6 @@ MachineFunction &MF; LiveIntervals &LIS; LiveStacks &LSS; - AliasAnalysis *AA; MachineDominatorTree &MDT; MachineLoopInfo &Loops; VirtRegMap &VRM; @@ -200,7 +197,6 @@ VirtRegAuxInfo &VRAI) : MF(MF), LIS(Pass.getAnalysis()), LSS(Pass.getAnalysis()), - AA(&Pass.getAnalysis().getAAResults()), MDT(Pass.getAnalysis()), Loops(Pass.getAnalysis()), VRM(VRM), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), @@ -659,7 +655,7 @@ /// reMaterializeAll - Try to rematerialize as many uses as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { - if (!Edit->anyRematerializable(AA)) + if (!Edit->anyRematerializable()) return; UsedValues.clear(); @@ -702,7 +698,7 @@ if (DeadDefs.empty()) return; LLVM_DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); - Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions // after rematerialization. To remove a VNI for a vreg from its LiveInterval, @@ -1180,7 +1176,7 @@ // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { LLVM_DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); - Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); } // Finally delete the SnippetCopies. @@ -1617,7 +1613,7 @@ RMEnt->removeOperand(i - 1); } } - Edit.eliminateDeadDefs(SpillsToRm, None, AA); + Edit.eliminateDeadDefs(SpillsToRm, None); } } Index: llvm/lib/CodeGen/LiveIntervals.cpp =================================================================== --- llvm/lib/CodeGen/LiveIntervals.cpp +++ llvm/lib/CodeGen/LiveIntervals.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveVariables.h" @@ -60,9 +59,8 @@ char LiveIntervals::ID = 0; char &llvm::LiveIntervalsID = LiveIntervals::ID; -INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", - "Live Interval Analysis", false, false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", + false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_END(LiveIntervals, "liveintervals", @@ -87,8 +85,6 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); AU.addPreservedID(MachineLoopInfoID); AU.addRequiredTransitiveID(MachineDominatorsID); @@ -126,7 +122,6 @@ MRI = &MF->getRegInfo(); TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); - AA = &getAnalysis().getAAResults(); Indexes = &getAnalysis(); DomTree = &getAnalysis(); Index: llvm/lib/CodeGen/LiveRangeEdit.cpp =================================================================== --- llvm/lib/CodeGen/LiveRangeEdit.cpp +++ llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -68,17 +68,16 @@ } bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, - const MachineInstr *DefMI, - AAResults *aa) { + const MachineInstr *DefMI) { assert(DefMI && "Missing instruction"); ScannedRemattable = true; - if (!TII.isTriviallyReMaterializable(*DefMI, aa)) + if (!TII.isTriviallyReMaterializable(*DefMI)) return false; Remattable.insert(VNI); return true; } -void LiveRangeEdit::scanRemattable(AAResults *aa) { +void LiveRangeEdit::scanRemattable() { for (VNInfo *VNI : getParent().valnos) { if (VNI->isUnused()) continue; @@ -90,14 +89,14 @@ MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def); if (!DefMI) continue; - checkRematerializable(OrigVNI, DefMI, aa); + checkRematerializable(OrigVNI, DefMI); } ScannedRemattable = true; } -bool LiveRangeEdit::anyRematerializable(AAResults *aa) { +bool LiveRangeEdit::anyRematerializable() { if (!ScannedRemattable) - scanRemattable(aa); + scanRemattable(); return !Remattable.empty(); } @@ -274,8 +273,7 @@ } /// Find all live intervals that need to shrink, then remove the instruction. -void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, - AAResults *AA) { +void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { assert(MI->allDefsAreDead() && "Def isn't really dead"); SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); @@ -384,7 +382,7 @@ // register uses. That may provoke RA to split an interval at the KILL // and later result in an invalid live segment end. if (isOrigDef && DeadRemats && !HasLiveVRegUses && - TII.isTriviallyReMaterializable(*MI, AA)) { + TII.isTriviallyReMaterializable(*MI)) { LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false); VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI)); @@ -414,14 +412,13 @@ } void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, - ArrayRef RegsBeingSpilled, - AAResults *AA) { + ArrayRef RegsBeingSpilled) { ToShrinkSet ToShrink; for (;;) { // Erase all dead defs. while (!Dead.empty()) - eliminateDeadDef(Dead.pop_back_val(), ToShrink, AA); + eliminateDeadDef(Dead.pop_back_val(), ToShrink); if (ToShrink.empty()) break; Index: llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp =================================================================== --- llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -13,10 +13,9 @@ #include "AllocationOrder.h" #include "RegAllocEvictionAdvisor.h" #include "RegAllocGreedy.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/TensorSpec.h" -#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) +#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" #endif @@ -91,7 +90,6 @@ AU.setPreservesAll(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -891,9 +889,7 @@ &getAnalysis())) if (auto *Log = DevModeAnalysis->getLogger(MF)) Log->logFloatFinalReward(static_cast( - calculateRegAllocScore( - MF, getAnalysis(), - getAnalysis().getAAResults()) + calculateRegAllocScore(MF, getAnalysis()) .getScore())); return false; Index: llvm/lib/CodeGen/MachineCSE.cpp =================================================================== --- llvm/lib/CodeGen/MachineCSE.cpp +++ llvm/lib/CodeGen/MachineCSE.cpp @@ -415,7 +415,7 @@ // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. - if (!MI->isDereferenceableInvariantLoad(AA)) + if (!MI->isDereferenceableInvariantLoad()) // FIXME: we should be able to hoist loads with no other side effects if // there are no other instructions which can change memory in this loop. // This is a trivial form of alias analysis. Index: llvm/lib/CodeGen/MachineInstr.cpp =================================================================== --- llvm/lib/CodeGen/MachineInstr.cpp +++ llvm/lib/CodeGen/MachineInstr.cpp @@ -1203,7 +1203,7 @@ // destination. The check for isInvariantLoad gives the target the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (mayLoad() && !isDereferenceableInvariantLoad(AA)) + if (mayLoad() && !isDereferenceableInvariantLoad()) // Otherwise, this is a real load. If there is a store between the load and // end of block, we can't move it. return !SawStore; @@ -1348,7 +1348,7 @@ /// isDereferenceableInvariantLoad - Return true if this instruction will never /// trap and is loading from a location whose value is invariant across a run of /// this function. -bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const { +bool MachineInstr::isDereferenceableInvariantLoad() const { // If the instruction doesn't load at all, it isn't an invariant load. if (!mayLoad()) return false; @@ -1374,12 +1374,6 @@ if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) { if (PSV->isConstant(&MFI)) continue; - } else if (const Value *V = MMO->getValue()) { - // If we have an AliasAnalysis, ask it whether the memory is constant. - if (AA && - AA->pointsToConstantMemory( - MemoryLocation(V, MMO->getSize(), MMO->getAAInfo()))) - continue; } // Otherwise assume conservatively. Index: llvm/lib/CodeGen/MachineLICM.cpp =================================================================== --- llvm/lib/CodeGen/MachineLICM.cpp +++ llvm/lib/CodeGen/MachineLICM.cpp @@ -230,8 +230,7 @@ bool IsGuaranteedToExecute(MachineBasicBlock *BB); - bool isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const; + bool isTriviallyReMaterializable(const MachineInstr &MI) const; void EnterScope(MachineBasicBlock *MBB); @@ -666,9 +665,9 @@ /// virtual register uses. Even though rematerializable RA might not actually /// rematerialize it in this scenario. In that case we do not want to hoist such /// instruction out of the loop in a belief RA will sink it back if needed. -bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { - if (!TII->isTriviallyReMaterializable(MI, AA)) +bool MachineLICMBase::isTriviallyReMaterializable( + const MachineInstr &MI) const { + if (!TII->isTriviallyReMaterializable(MI)) return false; for (const MachineOperand &MO : MI.operands()) { @@ -1174,7 +1173,7 @@ // Rematerializable instructions should always be hoisted providing the // register allocator can just pull them down again when needed. - if (isTriviallyReMaterializable(MI, AA)) + if (isTriviallyReMaterializable(MI)) return true; // FIXME: If there are long latency loop-invariant instructions inside the @@ -1227,8 +1226,8 @@ // High register pressure situation, only hoist if the instruction is going // to be remat'ed. - if (!isTriviallyReMaterializable(MI, AA) && - !MI.isDereferenceableInvariantLoad(AA)) { + if (!isTriviallyReMaterializable(MI) && + !MI.isDereferenceableInvariantLoad()) { LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; } @@ -1247,7 +1246,7 @@ // If not, we may be able to unfold a load and hoist that. // First test whether the instruction is loading from an amenable // memory location. - if (!MI->isDereferenceableInvariantLoad(AA)) + if (!MI->isDereferenceableInvariantLoad()) return nullptr; // Next determine the register class for a temporary register. Index: llvm/lib/CodeGen/MachinePipeliner.cpp =================================================================== --- llvm/lib/CodeGen/MachinePipeliner.cpp +++ llvm/lib/CodeGen/MachinePipeliner.cpp @@ -706,11 +706,11 @@ /// Return true if the instruction causes a chain between memory /// references before and after it. -static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) { +static bool isDependenceBarrier(MachineInstr &MI) { return MI.isCall() || MI.mayRaiseFPException() || MI.hasUnmodeledSideEffects() || (MI.hasOrderedMemoryRef() && - (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA))); + (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad())); } /// Return the underlying objects for the memory references of an instruction. @@ -743,7 +743,7 @@ UndefValue::get(Type::getVoidTy(MF.getFunction().getContext())); for (auto &SU : SUnits) { MachineInstr &MI = *SU.getInstr(); - if (isDependenceBarrier(MI, AA)) + if (isDependenceBarrier(MI)) PendingLoads.clear(); else if (MI.mayLoad()) { SmallVector Objs; Index: llvm/lib/CodeGen/RegAllocBasic.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocBasic.cpp +++ llvm/lib/CodeGen/RegAllocBasic.cpp @@ -135,6 +135,7 @@ INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) INITIALIZE_PASS_DEPENDENCY(MachineScheduler) INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) Index: llvm/lib/CodeGen/RegAllocGreedy.h =================================================================== --- llvm/lib/CodeGen/RegAllocGreedy.h +++ llvm/lib/CodeGen/RegAllocGreedy.h @@ -25,7 +25,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveRangeEdit.h" @@ -174,7 +173,6 @@ EdgeBundles *Bundles; SpillPlacement *SpillPlacer; LiveDebugVariables *DebugVars; - AliasAnalysis *AA; // state std::unique_ptr SpillerInstance; Index: llvm/lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocGreedy.cpp +++ llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -202,8 +202,6 @@ AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -2509,7 +2507,6 @@ Bundles = &getAnalysis(); SpillPlacer = &getAnalysis(); DebugVars = &getAnalysis(); - AA = &getAnalysis().getAAResults(); initializeCSRCost(); @@ -2531,7 +2528,7 @@ LLVM_DEBUG(LIS->dump()); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); - SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. Index: llvm/lib/CodeGen/RegAllocScore.h =================================================================== --- llvm/lib/CodeGen/RegAllocScore.h +++ llvm/lib/CodeGen/RegAllocScore.h @@ -19,7 +19,6 @@ namespace llvm { -class AAResults; class MachineBasicBlock; class MachineBlockFrequencyInfo; class MachineFunction; @@ -62,8 +61,7 @@ /// different policies, the better policy would have a smaller score. /// The implementation is the overload below (which is also easily unittestable) RegAllocScore calculateRegAllocScore(const MachineFunction &MF, - const MachineBlockFrequencyInfo &MBFI, - AAResults &AAResults); + const MachineBlockFrequencyInfo &MBFI); /// Implementation of the above, which is also more easily unittestable. RegAllocScore calculateRegAllocScore( Index: llvm/lib/CodeGen/RegAllocScore.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocScore.cpp +++ llvm/lib/CodeGen/RegAllocScore.cpp @@ -74,8 +74,7 @@ RegAllocScore llvm::calculateRegAllocScore(const MachineFunction &MF, - const MachineBlockFrequencyInfo &MBFI, - AAResults &AAResults) { + const MachineBlockFrequencyInfo &MBFI) { return calculateRegAllocScore( MF, [&](const MachineBasicBlock &MBB) { @@ -83,7 +82,7 @@ }, [&](const MachineInstr &MI) { return MF.getSubtarget().getInstrInfo()->isTriviallyReMaterializable( - MI, &AAResults); + MI); }); } Index: llvm/lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- llvm/lib/CodeGen/RegisterCoalescer.cpp +++ llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1306,7 +1306,7 @@ } if (!TII->isAsCheapAsAMove(*DefMI)) return false; - if (!TII->isTriviallyReMaterializable(*DefMI, AA)) + if (!TII->isTriviallyReMaterializable(*DefMI)) return false; if (!definesFullReg(*DefMI, SrcReg)) return false; Index: llvm/lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -530,9 +530,9 @@ /// Returns true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). -static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) { +static inline bool isGlobalMemoryObject(MachineInstr *MI) { return MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA)); + (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad()); } void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb, @@ -880,7 +880,7 @@ // actual addresses). // This is a barrier event that acts as a pivotal node in the DAG. - if (isGlobalMemoryObject(AA, &MI)) { + if (isGlobalMemoryObject(&MI)) { // Become the barrier chain. if (BarrierChain) @@ -917,7 +917,7 @@ // If it's not a store or a variant load, we're done. if (!MI.mayStore() && - !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA))) + !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad())) continue; // Always add dependecy edge to BarrierChain if present. Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" @@ -6688,7 +6689,7 @@ bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Turn a memcpy of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -6751,6 +6752,11 @@ AAMDNodes NewAAInfo = AAInfo; NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; + const Value *SrcVal = SrcPtrInfo.V.dyn_cast(); + bool isConstant = + AA && SrcVal && + AA->pointsToConstantMemory(MemoryLocation(SrcVal, Size, AAInfo)); + MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; SmallVector OutLoadChains; @@ -6812,6 +6818,8 @@ MachineMemOperand::Flags SrcMMOFlags = MMOFlags; if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; + if (isConstant) + SrcMMOFlags |= MachineMemOperand::MOInvariant; Value = DAG.getExtLoad( ISD::EXTLOAD, dl, NVT, Chain, @@ -7100,7 +7108,7 @@ bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -7111,7 +7119,7 @@ SDValue Result = getMemcpyLoadsAndStores( *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); + isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo, AA); if (Result.getNode()) return Result; } @@ -7130,9 +7138,9 @@ // use a (potentially long) sequence of loads and stores. if (AlwaysInline) { assert(ConstantSize && "AlwaysInline requires a constant size!"); - return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Alignment, - isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo); + return getMemcpyLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, + isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo, AA); } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -7214,7 +7222,7 @@ bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast(Size); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4083,6 +4083,8 @@ AAMDNodes AAInfo = I.getAAMetadata(); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); bool isVolatile = I.isVolatile(); + MachineMemOperand::Flags MMOFlags = + TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); SDValue Root; bool ConstantMemory = false; @@ -4099,6 +4101,12 @@ // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; + MMOFlags |= MachineMemOperand::MOInvariant; + + // FIXME: pointsToConstantMemory probably does not imply dereferenceable, + // but the previous usage implied it did. Probably should check + // isDereferenceableAndAlignedPointer. + MMOFlags |= MachineMemOperand::MODereferenceable; } else { // Do not serialize non-volatile loads against each other. Root = DAG.getRoot(); @@ -4118,9 +4126,6 @@ SmallVector Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); - MachineMemOperand::Flags MMOFlags - = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); - unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // Serializing loads here may result in excessive register pressure, and @@ -5865,11 +5870,10 @@ // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, - /* AlwaysInline */ false, isTC, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + SDValue MC = DAG.getMemcpy( + Root, sdl, Op1, Op2, Op3, Alignment, isVol, + /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } @@ -5887,11 +5891,10 @@ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. - SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, - /* AlwaysInline */ true, isTC, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + SDValue MC = DAG.getMemcpy( + getRoot(), sdl, Dst, Src, Size, Alignment, isVol, + /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } @@ -5946,7 +5949,7 @@ SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MM); return; } Index: llvm/lib/CodeGen/SplitKit.h =================================================================== --- llvm/lib/CodeGen/SplitKit.h +++ llvm/lib/CodeGen/SplitKit.h @@ -257,7 +257,6 @@ /// class LLVM_LIBRARY_VISIBILITY SplitEditor { SplitAnalysis &SA; - AAResults &AA; LiveIntervals &LIS; VirtRegMap &VRM; MachineRegisterInfo &MRI; @@ -436,9 +435,9 @@ public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. - SplitEditor(SplitAnalysis &SA, AAResults &AA, LiveIntervals &LIS, - VirtRegMap &VRM, MachineDominatorTree &MDT, - MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI); + SplitEditor(SplitAnalysis &SA, LiveIntervals &LIS, VirtRegMap &VRM, + MachineDominatorTree &MDT, MachineBlockFrequencyInfo &MBFI, + VirtRegAuxInfo &VRAI); /// reset - Prepare for a new split. void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); Index: llvm/lib/CodeGen/SplitKit.cpp =================================================================== --- llvm/lib/CodeGen/SplitKit.cpp +++ llvm/lib/CodeGen/SplitKit.cpp @@ -347,13 +347,11 @@ //===----------------------------------------------------------------------===// /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. -SplitEditor::SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA, - LiveIntervals &LIS, VirtRegMap &VRM, +SplitEditor::SplitEditor(SplitAnalysis &SA, LiveIntervals &LIS, VirtRegMap &VRM, MachineDominatorTree &MDT, MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI) - : SA(SA), AA(AA), LIS(LIS), VRM(VRM), - MRI(VRM.getMachineFunction().getRegInfo()), MDT(MDT), - TII(*VRM.getMachineFunction().getSubtarget().getInstrInfo()), + : SA(SA), LIS(LIS), VRM(VRM), MRI(VRM.getMachineFunction().getRegInfo()), + MDT(MDT), TII(*VRM.getMachineFunction().getSubtarget().getInstrInfo()), TRI(*VRM.getMachineFunction().getSubtarget().getRegisterInfo()), MBFI(MBFI), VRAI(VRAI), RegAssign(Allocator) {} @@ -371,9 +369,7 @@ LICalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); - // We don't need an AliasAnalysis since we will only be performing - // cheap-as-a-copy remats anyway. - Edit->anyRematerializable(nullptr); + Edit->anyRematerializable(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1454,7 +1450,7 @@ if (Dead.empty()) return; - Edit->eliminateDeadDefs(Dead, None, &AA); + Edit->eliminateDeadDefs(Dead, None); } void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) { Index: llvm/lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- llvm/lib/CodeGen/TargetInstrInfo.cpp +++ llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -916,7 +916,7 @@ } bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( - const MachineInstr &MI, AAResults *AA) const { + const MachineInstr &MI) const { const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -952,7 +952,7 @@ return false; // Avoid instructions which load from potentially varying memory. - if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)) + if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad()) return false; // If any of the registers accessed are non-constant, conservatively assume Index: llvm/lib/Target/AMDGPU/GCNSchedStrategy.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -142,7 +142,7 @@ // and single use outside the defining block into RematerializableInsts. void collectRematerializableInstructions(); - bool isTriviallyReMaterializable(const MachineInstr &MI, AAResults *AA); + bool isTriviallyReMaterializable(const MachineInstr &MI); // TODO: Should also attempt to reduce RP of SGPRs and AGPRs // Attempt to reduce RP of VGPR by sinking trivially rematerializable Index: llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -733,7 +733,7 @@ MachineOperand *Op = MRI.getOneDef(Reg); MachineInstr *Def = Op->getParent(); - if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def, AA)) + if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def)) continue; MachineInstr *UseI = &*MRI.use_instr_nodbg_begin(Reg); @@ -943,9 +943,8 @@ } // Copied from MachineLICM -bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) { - if (!TII->isTriviallyReMaterializable(MI, AA)) +bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI) { + if (!TII->isTriviallyReMaterializable(MI)) return false; for (const MachineOperand &MO : MI.operands()) Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -184,8 +184,7 @@ return ST; } - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; bool isIgnorableUse(const MachineOperand &MO) const override; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -108,8 +108,8 @@ return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); } -bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { +bool SIInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) { // Normally VALU use of exec would block the rematerialization, but that // is OK in this case to have an implicit exec read as all VALU do. Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -480,8 +480,7 @@ MachineInstr *canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) const; - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; private: /// Modeling special VFP / NEON fp MLA / MLS hazards. Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -6726,8 +6726,8 @@ return Subtarget.isMClass() && MF.getFunction().hasMinSize(); } -bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { +bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { // Try hard to rematerialize any VCTPs because if we spill P0, it will block // the tail predication conversion. This means that the element count // register has to be live for longer, but that has to be better than Index: llvm/lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -495,8 +495,7 @@ unsigned &SubIdx) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; Index: llvm/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1086,8 +1086,8 @@ // For opcodes with the ReMaterializable flag set, this function is called to // verify the instruction is really rematable. -bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const { +bool PPCInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { switch (MI.getOpcode()) { default: // This function should only be called for opcodes with the ReMaterializable Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -43,8 +43,7 @@ const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -40,7 +40,7 @@ RI(STI.getTargetTriple()) {} bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable( - const MachineInstr &MI, AAResults *AA) const { + const MachineInstr &MI) const { switch (MI.getOpcode()) { case WebAssembly::CONST_I32: case WebAssembly::CONST_I64: Index: llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -49,7 +49,6 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -164,15 +163,15 @@ // Determine whether MI reads memory, writes memory, has side effects, // and/or uses the stack pointer value. -static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, - bool &Write, bool &Effects, bool &StackPointer) { +static void query(const MachineInstr &MI, bool &Read, bool &Write, + bool &Effects, bool &StackPointer) { assert(!MI.isTerminator()); if (MI.isDebugInstr() || MI.isPosition()) return; // Check for loads. - if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA)) + if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad()) Read = true; // Check for stores. @@ -255,9 +254,9 @@ } // Test whether Def is safe and profitable to rematerialize. -static bool shouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA, +static bool shouldRematerialize(const MachineInstr &Def, const WebAssemblyInstrInfo *TII) { - return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA); + return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def); } // Identify the definition for this register at this point. This is a @@ -311,7 +310,7 @@ // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be // more precise. static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, - const MachineInstr *Insert, AliasAnalysis &AA, + const MachineInstr *Insert, const WebAssemblyFunctionInfo &MFI, const MachineRegisterInfo &MRI) { const MachineInstr *DefI = Def->getParent(); @@ -391,7 +390,7 @@ } bool Read = false, Write = false, Effects = false, StackPointer = false; - query(*DefI, AA, Read, Write, Effects, StackPointer); + query(*DefI, Read, Write, Effects, StackPointer); // If the instruction does not access memory and has no side effects, it has // no additional dependencies. @@ -406,7 +405,7 @@ bool InterveningWrite = false; bool InterveningEffects = false; bool InterveningStackPointer = false; - query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects, + query(*I, InterveningRead, InterveningWrite, InterveningEffects, InterveningStackPointer); if (Effects && InterveningEffects) return false; @@ -808,7 +807,6 @@ WebAssemblyFunctionInfo &MFI = *MF.getInfo(); const auto *TII = MF.getSubtarget().getInstrInfo(); const auto *TRI = MF.getSubtarget().getRegisterInfo(); - AliasAnalysis &AA = getAnalysis().getAAResults(); auto &MDT = getAnalysis(); auto &LIS = getAnalysis(); @@ -872,8 +870,7 @@ // supports intra-block moves) and it's MachineSink's job to catch all // the sinking opportunities anyway. bool SameBlock = DefI->getParent() == &MBB; - bool CanMove = SameBlock && - isSafeToMove(Def, &Use, Insert, AA, MFI, MRI) && + bool CanMove = SameBlock && isSafeToMove(Def, &Use, Insert, MFI, MRI) && !TreeWalker.isOnStack(Reg); if (CanMove && hasOneUse(Reg, DefI, MRI, MDT, LIS)) { Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI); @@ -883,7 +880,7 @@ // TODO: Encode this properly as a stackified value. if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg) MFI.clearFrameBaseVreg(); - } else if (shouldRematerialize(*DefI, AA, TII)) { + } else if (shouldRematerialize(*DefI, TII)) { Insert = rematerializeCheapDef(Reg, Use, *DefI, MBB, Insert->getIterator(), LIS, MFI, MRI, TII, TRI); Index: llvm/lib/Target/X86/X86InstrInfo.h =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.h +++ llvm/lib/Target/X86/X86InstrInfo.h @@ -240,8 +240,7 @@ unsigned isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override; - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, Index: llvm/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.cpp +++ llvm/lib/Target/X86/X86InstrInfo.cpp @@ -742,8 +742,8 @@ return isPICBase; } -bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { +bool X86InstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { switch (MI.getOpcode()) { default: // This function should only be called for opcodes with the ReMaterializable @@ -869,7 +869,7 @@ MI.getOperand(1 + X86::AddrScaleAmt).isImm() && MI.getOperand(1 + X86::AddrIndexReg).isReg() && MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 && - MI.isDereferenceableInvariantLoad(AA)) { + MI.isDereferenceableInvariantLoad()) { Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); if (BaseReg == 0 || BaseReg == X86::RIP) return true; Index: llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -52,6 +52,10 @@ ; RUN: -debug-pass=Structure %s -o /dev/null 2>&1 -verify-machineinstrs=0 \ ; RUN: | FileCheck %s --check-prefix DISABLED +; ENABLED: Safe Stack instrumentation pass + +; ENABLED-O1: Basic Alias Analysis (stateless AA impl) +; ENABLED-O1-NEXT: Function Alias Analysis Results ; ENABLED: IRTranslator ; VERIFY-NEXT: Verify generated machine code ; ENABLED-NEXT: Analysis for ComputingKnownBits @@ -60,8 +64,6 @@ ; ENABLED-O1-NEXT: PreLegalizerCombiner ; VERIFY-O0-NEXT: AArch64O0PreLegalizerCombiner ; VERIFY-NEXT: Verify generated machine code -; ENABLED-O1-NEXT: Basic Alias Analysis (stateless AA impl) -; ENABLED-O1-NEXT: Function Alias Analysis Results ; ENABLED-O1-NEXT: LoadStoreOpt ; VERIFY-O0-NEXT: Analysis containing CSE Info ; ENABLED-NEXT: Legalizer Index: llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll +++ llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll @@ -28,9 +28,9 @@ entry: ; CHECK-LABEL: t1: ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] +; CHECK: str [[DEST:q[0-9]+]], [x0] ; CHECK: ldur [[DEST:q[0-9]+]], [x[[BASEREG:[0-9]+]], #15] ; CHECK: stur [[DEST:q[0-9]+]], [x0, #15] -; CHECK: str [[DEST:q[0-9]+]], [x0] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i1 false) ret void } @@ -51,9 +51,9 @@ entry: ; CHECK-LABEL: t3: ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] +; CHECK: str [[DEST]], [x0] ; CHECK: ldr [[REG4:x[0-9]+]], [x[[BASEREG:[0-9]+]], #16] ; CHECK: str [[REG4]], [x0, #16] -; CHECK: str [[DEST]], [x0] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false) ret void } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -439,7 +439,7 @@ ; CHECK-LABEL: name: v8i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -460,7 +460,7 @@ ; CHECK-LABEL: name: v16i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -489,7 +489,7 @@ ; CHECK-LABEL: name: v32i32_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -549,7 +549,7 @@ ; CHECK-LABEL: name: v3i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -568,7 +568,7 @@ ; CHECK-LABEL: name: v4i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -589,7 +589,7 @@ ; CHECK-LABEL: name: v5i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -612,7 +612,7 @@ ; CHECK-LABEL: name: v8i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -641,7 +641,7 @@ ; CHECK-LABEL: name: v16i64_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -750,7 +750,7 @@ ; CHECK-LABEL: name: v5i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<5 x s16>) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF @@ -769,7 +769,7 @@ ; CHECK-LABEL: name: v8i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) @@ -786,7 +786,7 @@ ; CHECK-LABEL: name: v16i16_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) @@ -807,7 +807,7 @@ ; CHECK-LABEL: name: v16i8_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -905,7 +905,7 @@ ; CHECK-LABEL: name: v4i8_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -977,7 +977,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) ; CHECK-NEXT: SI_RETURN @@ -1016,7 +1016,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -1038,7 +1038,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile dereferenceable invariant load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -35,7 +35,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load (s32) from %ir.arg0, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile dereferenceable invariant load (s32) from %ir.arg0, addrspace 4) ; CHECK-NEXT: S_ENDPGM 0 %tmp0 = load volatile i32, i32 addrspace(4)* %arg0 ret void @@ -51,7 +51,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load (s32) from %ir.arg1, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile dereferenceable invariant load (s32) from %ir.arg1, addrspace 4) ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 %tmp0 = load volatile i32, i32 addrspace(4)* %arg1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -64,7 +64,7 @@ ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -90,7 +90,7 @@ ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -3202,7 +3202,7 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 @@ -3338,7 +3338,7 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32 @@ -3412,7 +3412,7 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 @@ -3507,7 +3507,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc @@ -3607,7 +3607,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: ("amdgpu-noclobber" load (s16) from `i16 addrspace(1)* undef`, addrspace 1) @@ -3718,7 +3718,7 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p3) from `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[COPY10]](p1) :: ("amdgpu-noclobber" load (p5) from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) @@ -3821,7 +3821,7 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -3873,7 +3873,7 @@ ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -3899,7 +3899,7 @@ ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) @@ -4111,7 +4111,7 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8 @@ -4175,7 +4175,7 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x s8>) from %ir.ptr, align 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8 @@ -4242,7 +4242,7 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8 @@ -4312,7 +4312,7 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8 @@ -4394,7 +4394,7 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (dereferenceable invariant load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -simplify-mir -global-isel -march=amdgcn -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s + +; Check the flags set on the memory operands for loads determined to +; be constants by alias analysis. + +@const_gv0 = external addrspace(1) constant i32, align 4 +@const_gv1 = external addrspace(1) constant i32, align 4 +@const_struct_gv = external addrspace(1) constant { i32, i64 }, align 8 + +define i32 @load_const_i32_gv() { + ; CHECK-LABEL: name: load_const_i32_gv + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p1) :: (dereferenceable invariant load (s32) from @const_gv0, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %load = load i32, ptr addrspace(1) @const_gv0, align 4 + ret i32 %load +} + +define i32 @load_select_const_i32_gv(i1 %cond) { + ; CHECK-LABEL: name: load_select_const_i32_gv + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv0 + ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv1 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p1) = G_SELECT [[TRUNC]](s1), [[GV]], [[GV1]] + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[SELECT]](p1) :: (dereferenceable invariant load (s32) from %ir.select, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %select = select i1 %cond, ptr addrspace(1) @const_gv0, ptr addrspace(1) @const_gv1 + %load = load i32, ptr addrspace(1) %select, align 4 + ret i32 %load +} + +define { i32, i64 } @load_const_struct_gv() { + ; CHECK-LABEL: name: load_const_struct_gv + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_struct_gv + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p1) :: (dereferenceable invariant load (s32) from @const_struct_gv, align 8, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[GV]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (dereferenceable invariant load (s64) from @const_struct_gv + 8, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %load = load { i32, i64 }, ptr addrspace(1) @const_struct_gv, align 8 + ret { i32, i64 } %load +} + +define void @test_memcpy_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src) { + ; CHECK-LABEL: name: test_memcpy_p1_constaddr_i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) + ; CHECK-NEXT: SI_RETURN + call void @llvm.memcpy.p1.p4.i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src, i64 32, i1 false) + ret void +} + +define void @test_memcpy_inline_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src) { + ; CHECK-LABEL: name: test_memcpy_inline_p1_constaddr_i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: G_MEMCPY_INLINE [[MV]](p1), [[MV1]](p4), [[C]](s64) :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) + ; CHECK-NEXT: SI_RETURN + call void @llvm.memcpy.inline.p1.p4.i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src, i64 32, i1 false) + ret void +} + +define void @test_memmove_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src) { + ; CHECK-LABEL: name: test_memmove_p1_constaddr_i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) + ; CHECK-NEXT: SI_RETURN + call void @llvm.memmove.p1.p4.i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src, i64 32, i1 false) + ret void +} + +declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #0 +declare void @llvm.memcpy.inline.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #0 +declare void @llvm.memmove.p1.p4.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1 immarg) #0 + +attributes #0 = { argmemonly nofree nounwind willreturn } Index: llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll +++ llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll @@ -4,7 +4,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s ; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer -; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load (s128) from %ir.13, addrspace 4) +; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (dereferenceable invariant load (s128) from %ir.13, addrspace 4) define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 { .entry: Index: llvm/test/CodeGen/AMDGPU/llc-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -102,8 +102,6 @@ ; GCN-O0-NEXT: Eliminate PHI nodes for register allocation ; GCN-O0-NEXT: SI Lower control flow pseudo instructions ; GCN-O0-NEXT: Two-Address instruction pass -; GCN-O0-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O0-NEXT: Function Alias Analysis Results ; GCN-O0-NEXT: MachineDominator Tree Construction ; GCN-O0-NEXT: Slot index numbering ; GCN-O0-NEXT: Live Interval Analysis Index: llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -30,7 +30,7 @@ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr9 ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr10 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr8 - ; CHECK-NEXT: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0 :: (load (s64) from %ir.40, addrspace 4) + ; CHECK-NEXT: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0 :: (dereferenceable invariant load (s64) from %ir.40, addrspace 4) ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 4, implicit-def dead $scc @@ -40,8 +40,8 @@ ; CHECK-NEXT: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: undef %130.sub0:sreg_64 = S_ADD_U32 [[COPY5]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %130.sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0 :: (load (s128) from %ir.84, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (load (s128) from `<4 x i32> addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0 :: (dereferenceable invariant load (s128) from %ir.84, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (dereferenceable invariant load (s128) from `<4 x i32> addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: KILL undef %74:sreg_64 ; CHECK-NEXT: KILL undef %132:sgpr_128 @@ -60,7 +60,7 @@ ; CHECK-NEXT: undef %149.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %149.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %156.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %149, 0, 0 :: (load (s128) from %ir.91, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %149, 0, 0 :: (dereferenceable invariant load (s128) from %ir.91, addrspace 4) ; CHECK-NEXT: %156.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %163.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %163.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc @@ -104,11 +104,11 @@ ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %369:sgpr_128, undef %370:sreg_32, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %380:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0 :: (load (s128) from %ir.97, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %163, 0, 0 :: (load (s128) from %ir.103, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0 :: (load (s128) from %ir.111, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0 :: (load (s128) from %ir.117, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0 :: (load (s128) from %ir.123, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0 :: (dereferenceable invariant load (s128) from %ir.97, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %163, 0, 0 :: (dereferenceable invariant load (s128) from %ir.103, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0 :: (dereferenceable invariant load (s128) from %ir.111, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0 :: (dereferenceable invariant load (s128) from %ir.117, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0 :: (dereferenceable invariant load (s128) from %ir.123, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %364:sgpr_128, [[S_ADD_I32_]], 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %375:sgpr_128, [[S_ADD_I32_1]], 0 :: (dereferenceable invariant load (s32)) @@ -121,7 +121,7 @@ ; CHECK-NEXT: undef %335.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %335.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %343.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0 :: (load (s128) from %ir.131, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0 :: (dereferenceable invariant load (s128) from %ir.131, addrspace 4) ; CHECK-NEXT: %343.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %351.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %351.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc @@ -130,11 +130,11 @@ ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %396:sgpr_128, [[S_ADD_I32_6]], 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0 :: (load (s128) from %ir.155, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0 :: (load (s128) from %ir.138, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0 :: (dereferenceable invariant load (s128) from %ir.155, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0 :: (dereferenceable invariant load (s128) from %ir.138, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0 :: (load (s128) from %ir.144, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0 :: (load (s128) from %ir.150, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0 :: (dereferenceable invariant load (s128) from %ir.144, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0 :: (dereferenceable invariant load (s128) from %ir.150, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) @@ -157,29 +157,29 @@ ; CHECK-NEXT: %425.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]]:sreg_32 = S_ADD_U32 %56.sub0, 168, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0 :: (load (s128) from %ir.162, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0 :: (dereferenceable invariant load (s128) from %ir.162, addrspace 4) ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc ; CHECK-NEXT: undef %441.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_5]], implicit-def $scc ; CHECK-NEXT: %441.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %441, 0, 0 :: (load (s32) from %ir..i085.i, align 8, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0 :: (load (s128) from %ir.170, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %441, 0, 0 :: (dereferenceable invariant load (s32) from %ir..i085.i, align 8, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0 :: (dereferenceable invariant load (s128) from %ir.170, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0 :: (load (s128) from %ir.176, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0 :: (dereferenceable invariant load (s128) from %ir.176, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: %71.sub3:sgpr_128 = S_MOV_B32 553734060 ; CHECK-NEXT: %71.sub2:sgpr_128 = S_MOV_B32 -1 ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY %71 - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0 :: (load (s128) from %ir.185, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0 :: (dereferenceable invariant load (s128) from %ir.185, addrspace 4) ; CHECK-NEXT: [[COPY13]].sub1:sgpr_128 = COPY %302.sub1 ; CHECK-NEXT: [[COPY13]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY13]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0 :: (load (s128) from %ir.194, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0 :: (load (s128) from %ir.200, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0 :: (dereferenceable invariant load (s128) from %ir.194, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0 :: (dereferenceable invariant load (s128) from %ir.200, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) @@ -187,18 +187,18 @@ ; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc ; CHECK-NEXT: undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc ; CHECK-NEXT: %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %453, 0, 0 :: (load (s64) from %ir.308, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %453, 0, 0 :: (dereferenceable invariant load (s64) from %ir.308, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0 :: (load (s128) from %ir.223, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0 :: (load (s128) from %ir.230, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0 :: (dereferenceable invariant load (s128) from %ir.223, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0 :: (dereferenceable invariant load (s128) from %ir.230, addrspace 4) ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY %71 - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0 :: (load (s128) from %ir.236, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0 :: (dereferenceable invariant load (s128) from %ir.236, addrspace 4) ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY14]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; CHECK-NEXT: [[COPY14]].sub1:sgpr_128 = COPY [[S_AND_B32_]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY14]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0 :: (load (s128) from %ir.242, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0 :: (dereferenceable invariant load (s128) from %ir.242, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 3, implicit-def dead $scc @@ -208,24 +208,24 @@ ; CHECK-NEXT: undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc ; CHECK-NEXT: %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %468, 0, 0 :: (load (s64) from %ir.320, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %468, 0, 0 :: (dereferenceable invariant load (s64) from %ir.320, addrspace 4) ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY %71 ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY15]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 ; CHECK-NEXT: [[COPY15]].sub1:sgpr_128 = COPY [[S_AND_B32_1]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0 :: (load (s128) from %ir.282, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0 :: (dereferenceable invariant load (s128) from %ir.282, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: KILL %411.sub0, %411.sub1 ; CHECK-NEXT: KILL undef %488:sreg_64 ; CHECK-NEXT: KILL [[COPY15]].sub0_sub1, [[COPY15]].sub2_sub3 ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (load (s128) from %ir.291, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (dereferenceable invariant load (s128) from %ir.291, addrspace 4) ; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc ; CHECK-NEXT: undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc ; CHECK-NEXT: %485.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %485, 0, 0 :: (load (s32) from %ir..i0100.i, align 8, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %485, 0, 0 :: (dereferenceable invariant load (s32) from %ir..i0100.i, align 8, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] @@ -245,13 +245,13 @@ ; CHECK-NEXT: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0 :: (load (s128) from %ir.351, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0 :: (dereferenceable invariant load (s128) from %ir.351, addrspace 4) ; CHECK-NEXT: undef %522.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_1]], implicit-def $scc ; CHECK-NEXT: %522.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0 :: (load (s128) from %ir.357, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0 :: (dereferenceable invariant load (s128) from %ir.357, addrspace 4) ; CHECK-NEXT: undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0 :: (load (s128) from %ir.363, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0 :: (dereferenceable invariant load (s128) from %ir.363, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) @@ -370,7 +370,7 @@ ; CHECK-NEXT: [[V_OR_B32_e32_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_63]], [[V_ADD_U32_e32_28]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e32_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec ; CHECK-NEXT: [[V_OR_B32_e32_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_64]], [[V_ADD_U32_e32_29]], implicit $exec - ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %564:sreg_64, 0, 0 :: (load (s256) from `<8 x i32> addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %564:sreg_64, 0, 0 :: (dereferenceable invariant load (s256) from `<8 x i32> addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[V_OR_B32_e32_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_65]], [[V_ADD_U32_e32_30]], implicit $exec ; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc ; CHECK-NEXT: [[V_OR_B32_e32_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_24]], [[V_OR_B32_e32_66]], implicit $exec Index: llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll +++ llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll @@ -11,8 +11,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed $sgpr1 ; CHECK-NEXT: undef %0.sub0:sreg_64 = COPY killed [[COPY]] ; CHECK-NEXT: %0.sub1:sreg_64 = COPY killed [[COPY1]] - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (load (<2 x s32>) from %ir.ptr, align 4, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 8, 0 :: (load (s32) from %ir.ptr + 8, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (<2 x s32>) from %ir.ptr, align 4, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 8, 0 :: (dereferenceable invariant load (s32) from %ir.ptr + 8, addrspace 4) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; CHECK-NEXT: $sgpr0 = COPY killed [[COPY2]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORDX2_IMM]].sub1 Index: llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -579,18 +579,18 @@ ; SI-NEXT: %69:vgpr_32, dead %71:sreg_32_xm0_xexec = V_ADDC_U32_e64 killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_LSHLREV_B64_e64_]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; SI-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %69, %subreg.sub1 ; SI-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s64) from %ir.idx, addrspace 1) - ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[GLOBAL_LOAD_DWORDX2_]], 16, 0, implicit $exec :: (load (s128) from %ir.6 + 16, addrspace 4) + ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[GLOBAL_LOAD_DWORDX2_]], 16, 0, implicit $exec :: (dereferenceable invariant load (s128) from %ir.6 + 16, addrspace 4) ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub3 ; SI-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub2 ; SI-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub1 ; SI-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub0 - ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_1:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[GLOBAL_LOAD_DWORDX2_]], 0, 0, implicit $exec :: (load (s128) from %ir.6, align 32, addrspace 4) + ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_1:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[GLOBAL_LOAD_DWORDX2_]], 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from %ir.6, align 32, addrspace 4) ; SI-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_1]].sub3 ; SI-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_1]].sub2 ; SI-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_1]].sub1 ; SI-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_1]].sub0 ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3, killed [[COPY5]], %subreg.sub4, killed [[COPY4]], %subreg.sub5, killed [[COPY3]], %subreg.sub6, killed [[COPY2]], %subreg.sub7 - ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_2:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 killed [[GLOBAL_LOAD_DWORDX2_]], 48, 0, implicit $exec :: (load (s128) from %ir.8, addrspace 4) + ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_2:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 killed [[GLOBAL_LOAD_DWORDX2_]], 48, 0, implicit $exec :: (dereferenceable invariant load (s128) from %ir.8, addrspace 4) ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; SI-NEXT: {{ $}} ; SI-NEXT: bb.2: Index: llvm/test/CodeGen/X86/unfoldMemoryOperand.mir =================================================================== --- llvm/test/CodeGen/X86/unfoldMemoryOperand.mir +++ llvm/test/CodeGen/X86/unfoldMemoryOperand.mir @@ -88,7 +88,7 @@ ; CHECK-NEXT: renamable $eax = MOV32r0 implicit-def dead $eflags ; CHECK-NEXT: renamable $rcx = MOV64ri32 -4096 ; CHECK-NEXT: [[MOV64ri32_:%[0-9]+]]:gr64 = MOV64ri32 -4096 - ; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm $rip, 1, $noreg, @y, $noreg :: (dereferenceable load (s64) from @y, !tbaa !3) + ; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm $rip, 1, $noreg, @y, $noreg :: (dereferenceable invariant load (s64) from @y, !tbaa !3) ; CHECK-NEXT: JMP_1 %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.4): @@ -115,7 +115,7 @@ successors: %bb.1(0x04000000), %bb.2(0x7c000000) liveins: $eax, $rcx %2:gr64 = MOV64ri32 -4096 - CMP64mi32 $rip, 1, $noreg, @y, $noreg, @x, implicit-def $eflags :: (dereferenceable load (s64) from @y, !tbaa !3) + CMP64mi32 $rip, 1, $noreg, @y, $noreg, @x, implicit-def $eflags :: (dereferenceable invariant load (s64) from @y, !tbaa !3) renamable $al = SETCCr 4, implicit killed $eflags, implicit killed $eax, implicit-def $eax MOV32mr renamable $rcx, 1, $noreg, @z + 4096, $noreg, renamable $eax :: (store (s32) into %ir.scevgep, !tbaa !7) renamable $rcx = ADD64ri8 killed renamable $rcx, 4, implicit-def $eflags