Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -284,6 +284,18 @@ return getPointerTy(DL); } + /// This callback is used to inspect load/store instructions and add + /// target-specific MachineMemOperand flags to them. The default + /// implementation does nothing. + virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const { + return MachineMemOperand::MONone; + } + + MachineMemOperand::Flags getLoadMemOperandFlags(const LoadInst &LI, + const DataLayout &DL) const; + MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI, + const DataLayout &DL) const; + virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { return true; } @@ -3763,13 +3775,6 @@ return Chain; } - /// This callback is used to inspect load/store instructions and add - /// target-specific MachineMemOperand flags to them. The default - /// implementation does nothing. - virtual MachineMemOperand::Flags getMMOFlags(const Instruction &I) const { - return MachineMemOperand::MONone; - } - /// Should SelectionDAG lower an atomic store of the given kind as a normal /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to /// eventually migrate all targets to the using StoreSDNodes, but porting is Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" @@ -859,11 +860,6 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { const LoadInst &LI = cast(U); - - auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile - : MachineMemOperand::MONone; - Flags |= MachineMemOperand::MOLoad; - if (DL->getTypeStoreSize(LI.getType()) == 0) return true; @@ -882,6 +878,9 @@ return true; } + auto &TLI = *MF->getSubtarget().getTargetLowering(); + MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL); + const MDNode *Ranges = Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; for (unsigned i = 0; i < Regs.size(); ++i) { @@ -904,10 +903,6 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { const StoreInst &SI = cast(U); - auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile - : MachineMemOperand::MONone; - Flags |= MachineMemOperand::MOStore; - if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) return true; @@ -927,6 +922,9 @@ return true; } + auto &TLI = *MF->getSubtarget().getTargetLowering(); + MachineMemOperand::Flags Flags = TLI.getStoreMemOperandFlags(SI, *DL); + for (unsigned i = 0; i < Vals.size(); ++i) { Register Addr; MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4039,12 +4039,6 @@ SDValue Ptr = getValue(SV); Type *Ty = I.getType(); - - bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal); - bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load); - bool isDereferenceable = - isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; @@ -4058,6 +4052,8 @@ if (NumValues == 0) return; + bool isVolatile = I.isVolatile(); + SDValue Root; bool ConstantMemory = false; if (isVolatile || NumValues > MaxParallelChains) @@ -4089,6 +4085,10 @@ SmallVector Values(NumValues); SmallVector Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); + + MachineMemOperand::Flags MMOFlags + = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); + unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // Serializing loads here may result in excessive register pressure, and @@ -4108,16 +4108,6 @@ PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), Flags); - auto MMOFlags = MachineMemOperand::MONone; - if (isVolatile) - MMOFlags |= MachineMemOperand::MOVolatile; - if (isNonTemporal) - MMOFlags |= MachineMemOperand::MONonTemporal; - if (isInvariant) - MMOFlags |= MachineMemOperand::MOInvariant; - if (isDereferenceable) - MMOFlags |= MachineMemOperand::MODereferenceable; - MMOFlags |= TLI.getMMOFlags(I); SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, @@ -4244,12 +4234,7 @@ AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - auto MMOFlags = MachineMemOperand::MONone; - if (I.isVolatile()) - MMOFlags |= MachineMemOperand::MOVolatile; - if (I.hasMetadata(LLVMContext::MD_nontemporal)) - MMOFlags |= MachineMemOperand::MONonTemporal; - MMOFlags |= TLI.getMMOFlags(I); + auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. @@ -4618,7 +4603,7 @@ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; if (I.isVolatile()) Flags |= MachineMemOperand::MOVolatile; - Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + Flags |= DAG.getTargetLoweringInfo().getTargetMMOFlags(I); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = @@ -4669,7 +4654,7 @@ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; if (I.isVolatile()) Flags |= MachineMemOperand::MOVolatile; - Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + Flags |= DAG.getTargetLoweringInfo().getTargetMMOFlags(I); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = @@ -4715,16 +4700,7 @@ I.getAlignment() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); - auto Flags = MachineMemOperand::MOLoad; - if (I.isVolatile()) - Flags |= MachineMemOperand::MOVolatile; - if (I.hasMetadata(LLVMContext::MD_invariant_load)) - Flags |= MachineMemOperand::MOInvariant; - if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), - DAG.getDataLayout())) - Flags |= MachineMemOperand::MODereferenceable; - - Flags |= TLI.getMMOFlags(I); + auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -4780,10 +4756,7 @@ if (I.getAlignment() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - auto Flags = MachineMemOperand::MOStore; - if (I.isVolatile()) - Flags |= MachineMemOperand::MOVolatile; - Flags |= TLI.getMMOFlags(I); + auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -2005,3 +2006,39 @@ void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const { MF.getRegInfo().freezeReservedRegs(MF); } + +MachineMemOperand::Flags +TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI, + const DataLayout &DL) const { + MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad; + if (LI.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + + if (LI.hasMetadata(LLVMContext::MD_nontemporal)) + Flags |= MachineMemOperand::MONonTemporal; + + if (LI.hasMetadata(LLVMContext::MD_invariant_load)) + Flags |= MachineMemOperand::MOInvariant; + + if (isDereferenceablePointer(LI.getPointerOperand(), LI.getType(), DL)) + Flags |= MachineMemOperand::MODereferenceable; + + Flags |= getTargetMMOFlags(LI); + return Flags; +} + +MachineMemOperand::Flags +TargetLoweringBase::getStoreMemOperandFlags(const StoreInst &SI, + const DataLayout &DL) const { + MachineMemOperand::Flags Flags = MachineMemOperand::MOStore; + + if (SI.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + + if (SI.hasMetadata(LLVMContext::MD_nontemporal)) + Flags |= MachineMemOperand::MONonTemporal; + + // FIXME: Not preserving dereferenceable + Flags |= getTargetMMOFlags(SI); + return Flags; +} Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -610,7 +610,8 @@ unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const; - MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override; + MachineMemOperand::Flags getTargetMMOFlags( + const Instruction &I) const override; bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8932,7 +8932,7 @@ } MachineMemOperand::Flags -AArch64TargetLowering::getMMOFlags(const Instruction &I) const { +AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const { if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor && I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr) return MOStridedAccess; Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -677,7 +677,8 @@ MachineBasicBlock *MBB, unsigned Opcode) const; - MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override; + MachineMemOperand::Flags + getTargetMMOFlags(const Instruction &I) const override; const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; }; Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -3995,7 +3995,7 @@ } MachineMemOperand::Flags -SystemZTargetLowering::getMMOFlags(const Instruction &I) const { +SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const { // Because of how we convert atomic_load and atomic_store to normal loads and // stores in the DAG, we need to ensure that the MMOs are marked volatile // since DAGCombine hasn't been updated to account for atomic, but non Index: llvm/lib/Target/XCore/XCoreISelLowering.h =================================================================== --- llvm/lib/Target/XCore/XCoreISelLowering.h +++ llvm/lib/Target/XCore/XCoreISelLowering.h @@ -188,7 +188,8 @@ SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; - MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override; + MachineMemOperand::Flags getTargetMMOFlags( + const Instruction &I) const override; // Inline asm support std::pair Index: llvm/lib/Target/XCore/XCoreISelLowering.cpp =================================================================== --- llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -996,7 +996,7 @@ } MachineMemOperand::Flags -XCoreTargetLowering::getMMOFlags(const Instruction &I) const { +XCoreTargetLowering::getTargetMMOFlags(const Instruction &I) const { // Because of how we convert atomic_load and atomic_store to normal loads and // stores in the DAG, we need to ensure that the MMOs are marked volatile // since DAGCombine hasn't been updated to account for atomic, but non Index: llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll @@ -18,7 +18,7 @@ ; CHECK: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2, debug-location !DILocation(line: 0, scope: !22) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %ir.retval) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0), debug-location !17 :: (load 4 from @var1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0), debug-location !17 :: (dereferenceable load 4 from @var1) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C1]], debug-location !19 ; CHECK: G_BRCOND [[ICMP]](s1), %bb.2, debug-location !20 ; CHECK: G_BR %bb.3, debug-location !20 Index: llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=aarch64-- -mcpu=falkor -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +define i32 @load_invariant(i32* %ptr) { + ; CHECK-LABEL: name: load_invariant + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (invariant load 4 from %ir.ptr) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %load = load i32, i32* %ptr, align 4, !invariant.load !0 + ret i32 %load +} + +define i32 @load_volatile_invariant(i32* %ptr) { + ; CHECK-LABEL: name: load_volatile_invariant + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile invariant load 4 from %ir.ptr) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %load = load volatile i32, i32* %ptr, align 4, !invariant.load !0 + ret i32 %load +} + +define i32 @load_dereferenceable(i32* dereferenceable(4) %ptr) { + ; CHECK-LABEL: name: load_dereferenceable + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable load 4 from %ir.ptr) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %load = load i32, i32* %ptr, align 4 + ret i32 %load +} + +define i32 @load_dereferenceable_invariant(i32* dereferenceable(4) %ptr) { + ; CHECK-LABEL: name: load_dereferenceable_invariant + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable invariant load 4 from %ir.ptr) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %load = load i32, i32* %ptr, align 4, !invariant.load !0 + ret i32 %load +} + +define i32 @load_nontemporal(i32* %ptr) { + ; CHECK-LABEL: name: load_nontemporal + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (non-temporal load 4 from %ir.ptr) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %load = load i32, i32* %ptr, align 4, !nontemporal !0 + ret i32 %load +} + +define i32 @load_falkor_strided_access(i32* %ptr) { + ; CHECK-LABEL: name: load_falkor_strided_access + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: ("aarch64-strided-access" load 4 from %ir.ptr) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %load = load i32, i32* %ptr, align 4, !falkor.strided.access !0 + ret i32 %load +} + +!0 = !{} Index: llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stackprotect-check.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stackprotect-check.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stackprotect-check.ll @@ -32,7 +32,7 @@ ; CHECK-MIR-NEXT: G_STORE %2(p0), %0(p0) :: (volatile store 8 into %stack.0.StackGuardSlot) ; CHECK-MIR-NEXT: %3:_(p0) = G_FRAME_INDEX %stack.1.buf ; CHECK-MIR-NEXT: %4:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard) -; CHECK-MIR-NEXT: %5:_(p0) = G_LOAD %0(p0) :: (volatile load 8 from %ir.StackGuardSlot) +; CHECK-MIR-NEXT: %5:_(p0) = G_LOAD %0(p0) :: (volatile dereferenceable load 8 from %ir.StackGuardSlot) ; CHECK-MIR-NEXT: %6:_(s1) = G_ICMP intpred(eq), %4(p0), %5 ; CHECK-MIR-NEXT: G_BRCOND %6(s1), %bb.2 ; CHECK-MIR-NEXT: G_BR %bb.3 Index: llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=aarch64-- -mcpu=falkor -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +define void @store_nontemporal(i32* dereferenceable(4) %ptr) { + ; CHECK-LABEL: name: store_nontemporal + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (non-temporal store 4 into %ir.ptr) + ; CHECK: RET_ReallyLR + store i32 0, i32* %ptr, align 4, !nontemporal !0 + ret void +} + +define void @store_dereferenceable(i32* dereferenceable(4) %ptr) { + ; CHECK-LABEL: name: store_dereferenceable + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (store 4 into %ir.ptr) + ; CHECK: RET_ReallyLR + store i32 0, i32* %ptr, align 4 + ret void +} + +define void @store_volatile_dereferenceable(i32* dereferenceable(4) %ptr) { + ; CHECK-LABEL: name: store_volatile_dereferenceable + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (volatile store 4 into %ir.ptr) + ; CHECK: RET_ReallyLR + store volatile i32 0, i32* %ptr, align 4 + ret void +} + +define void @store_falkor_strided_access(i32* %ptr) { + ; CHECK-LABEL: name: store_falkor_strided_access + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: ("aarch64-strided-access" store 4 into %ir.ptr) + ; CHECK: RET_ReallyLR + store i32 0, i32* %ptr, align 4, !falkor.strided.access !0 + ret void +} + +!0 = !{} Index: llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll @@ -25,7 +25,7 @@ ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1) + ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @var1) ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[LOAD]](s32), [[C]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.3 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -1364,10 +1364,10 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (load 1 from %ir.arg0, align 4, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 1 from %ir.arg0, align 4, addrspace 5) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD]], [[C]](s32) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from %ir.arg0 + 4, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load 4 from %ir.arg0 + 4, addrspace 5) ; CHECK: G_STORE [[LOAD1]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) @@ -1391,13 +1391,13 @@ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (volatile load 1 from %ir.arg0, align 4, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (volatile dereferenceable load 1 from %ir.arg0, align 4, addrspace 5) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD]], [[C]](s32) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile load 4 from %ir.arg0 + 4, addrspace 5) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s8) = G_LOAD [[LOAD1]](p5) :: (volatile load 1 from %ir.arg1, align 4, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load 4 from %ir.arg0 + 4, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s8) = G_LOAD [[LOAD1]](p5) :: (volatile dereferenceable load 1 from %ir.arg1, align 4, addrspace 5) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD1]], [[C]](s32) - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile load 4 from %ir.arg1 + 4, addrspace 5) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load 4 from %ir.arg1 + 4, addrspace 5) ; CHECK: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) @@ -1427,8 +1427,8 @@ ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p5) :: (load 4 from %ir.arg0, addrspace 5) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p5) :: (load 8 from %ir.arg1, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 4 from %ir.arg0, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p5) :: (dereferenceable load 8 from %ir.arg1, addrspace 5) ; CHECK: G_STORE [[LOAD2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[LOAD3]](s64), [[DEF1]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] Index: llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll +++ llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll @@ -29,13 +29,13 @@ ; MIPS32: G_STORE [[COPY]](p0), [[FRAME_INDEX3]](p0) :: (store 4 into %ir.fmt.addr) ; MIPS32: G_VASTART [[FRAME_INDEX4]](p0) :: (store 4 into %ir.ap1, align 1) ; MIPS32: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.va_copy), [[FRAME_INDEX5]](p0), [[FRAME_INDEX4]](p0) - ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX5]](p0) :: (load 4 from %ir.aq) + ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX5]](p0) :: (dereferenceable load 4 from %ir.aq) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD]], [[C]](s32) - ; MIPS32: G_STORE [[GEP]](p0), [[FRAME_INDEX5]](p0) :: (store 4 into %ir.aq) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD]], [[C]](s32) + ; MIPS32: G_STORE [[PTR_ADD]](p0), [[FRAME_INDEX5]](p0) :: (store 4 into %ir.aq) ; MIPS32: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.2) ; MIPS32: G_STORE [[LOAD1]](p0), [[FRAME_INDEX6]](p0) :: (store 4 into %ir.s) - ; MIPS32: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX6]](p0) :: (load 4 from %ir.s) + ; MIPS32: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX6]](p0) :: (dereferenceable load 4 from %ir.s) ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY4]](p0) ; MIPS32: $a1 = COPY [[LOAD2]](p0) Index: llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll =================================================================== --- llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll +++ llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll @@ -20,7 +20,7 @@ ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.f ; ALL: G_STORE [[TRUNC]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.coerce.dive2) ; ALL: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) - ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %ir.coerce.dive13) + ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 4 from %ir.coerce.dive13) ; ALL: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32) ; ALL: $xmm0 = COPY [[ANYEXT]](s128) ; ALL: RET 0, implicit $xmm0 @@ -50,7 +50,7 @@ ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d ; ALL: G_STORE [[TRUNC]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.coerce.dive2) ; ALL: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.0, align 8), (load 1 from %ir.1, align 8) - ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load 8 from %ir.coerce.dive13) + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.coerce.dive13) ; ALL: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s64) ; ALL: $xmm0 = COPY [[ANYEXT]](s128) ; ALL: RET 0, implicit $xmm0 @@ -80,12 +80,12 @@ ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d ; ALL: G_STORE [[TRUNC]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.1) ; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL: G_STORE [[TRUNC1]](s64), [[GEP]](p0) :: (store 8 into %ir.2) + ; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) + ; ALL: G_STORE [[TRUNC1]](s64), [[PTR_ADD]](p0) :: (store 8 into %ir.2) ; ALL: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.3, align 8), (load 1 from %ir.4, align 8) - ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load 8 from %ir.5) - ; ALL: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.5 + 8) + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.5) + ; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load 8 from %ir.5 + 8) ; ALL: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s64) ; ALL: $xmm0 = COPY [[ANYEXT]](s128) ; ALL: [[ANYEXT1:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD1]](s64) @@ -117,7 +117,7 @@ ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i ; ALL: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.coerce.dive2) ; ALL: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) - ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %ir.coerce.dive13) + ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 4 from %ir.coerce.dive13) ; ALL: $eax = COPY [[LOAD]](s32) ; ALL: RET 0, implicit $eax entry: @@ -143,7 +143,7 @@ ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.0, align 4) ; ALL: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.1, align 4), (load 1 from %ir.2, align 4) - ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load 8 from %ir.3, align 4) + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.3, align 4) ; ALL: $rax = COPY [[LOAD]](s64) ; ALL: RET 0, implicit $rax entry: @@ -172,14 +172,14 @@ ; ALL: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.tmp ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store 8 into %ir.0, align 4) ; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64) - ; ALL: G_STORE [[COPY1]](s32), [[GEP]](p0) :: (store 4 into %ir.1) + ; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64) + ; ALL: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store 4 into %ir.1) ; ALL: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store 1 into %ir.2, align 4), (load 1 from %ir.3, align 4) ; ALL: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.4, align 4), (load 1 from %ir.5, align 4) ; ALL: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store 1 into %ir.6, align 8), (load 1 from %ir.7, align 4) - ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (load 8 from %ir.tmp) - ; ALL: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64) - ; ALL: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.tmp + 8, align 8) + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load 8 from %ir.tmp) + ; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64) + ; ALL: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load 4 from %ir.tmp + 8, align 8) ; ALL: $rax = COPY [[LOAD]](s64) ; ALL: $edx = COPY [[LOAD1]](s32) ; ALL: RET 0, implicit $rax, implicit $edx @@ -216,12 +216,12 @@ ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.1, align 4) ; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL: G_STORE [[COPY1]](s64), [[GEP]](p0) :: (store 8 into %ir.2, align 4) + ; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) + ; ALL: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store 8 into %ir.2, align 4) ; ALL: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.3, align 4), (load 1 from %ir.4, align 4) - ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load 8 from %ir.5, align 4) - ; ALL: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.5 + 8, align 4) + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.5, align 4) + ; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load 8 from %ir.5 + 8, align 4) ; ALL: $rax = COPY [[LOAD]](s64) ; ALL: $rdx = COPY [[LOAD1]](s64) ; ALL: RET 0, implicit $rax, implicit $rdx