Index: include/llvm/CodeGen/MachineMemOperand.h =================================================================== --- include/llvm/CodeGen/MachineMemOperand.h +++ include/llvm/CodeGen/MachineMemOperand.h @@ -45,18 +45,28 @@ /// Offset - This is an offset from the base Value*. int64_t Offset; + /// Address space of the base value. + unsigned AddrSpace; + explicit MachinePointerInfo(const Value *v = nullptr, int64_t offset = 0) - : V(v), Offset(offset) {} + : V(v), Offset(offset) { + AddrSpace = v ? v->getType()->getPointerAddressSpace() : 0; + } - explicit MachinePointerInfo(const PseudoSourceValue *v, - int64_t offset = 0) - : V(v), Offset(offset) {} + /// For stack and fixed stack, \p AS specifies the address space. + /// For other kinds, \p AS is ignored. + explicit MachinePointerInfo(const PseudoSourceValue *v, int64_t offset = 0, + unsigned AS = 0) + : V(v), Offset(offset) { + AddrSpace = v && v->hasAllocaAddrSpace() ? AS : 0; + } MachinePointerInfo getWithOffset(int64_t O) const { if (V.isNull()) return MachinePointerInfo(); if (V.is()) return MachinePointerInfo(V.get(), Offset+O); - return MachinePointerInfo(V.get(), Offset+O); + return MachinePointerInfo(V.get(), Offset + O, + AddrSpace); } /// Return true if memory region [V, V+Offset+Size) is known to be @@ -65,7 +75,7 @@ const DataLayout &DL) const; /// Return the LLVM IR address space number that this pointer points into. - unsigned getAddrSpace() const; + unsigned getAddrSpace() const { return AddrSpace; } /// Return a MachinePointerInfo record that refers to the constant pool. static MachinePointerInfo getConstantPool(MachineFunction &MF); Index: include/llvm/CodeGen/PseudoSourceValue.h =================================================================== --- include/llvm/CodeGen/PseudoSourceValue.h +++ include/llvm/CodeGen/PseudoSourceValue.h @@ -64,6 +64,9 @@ PSVKind kind() const { return Kind; } + bool hasAllocaAddrSpace() const { + return Kind == Stack || Kind == FixedStack; + } bool isStack() const { return Kind == Stack; } bool isGOT() const { return Kind == GOT; } bool isConstantPool() const { return Kind == ConstantPool; } Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -560,13 +560,6 @@ // MachineMemOperand Implementation //===----------------------------------------------------------------------===// -/// getAddrSpace - Return the LLVM IR address space number that this pointer -/// points into. -unsigned MachinePointerInfo::getAddrSpace() const { - if (V.isNull() || V.is()) return 0; - return cast(V.get()->getType())->getAddressSpace(); -} - /// isDereferenceable - Return true if V is always dereferenceable for /// Offset + Size byte. bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, @@ -594,7 +587,8 @@ /// the specified FrameIndex. MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF, int FI, int64_t Offset) { - return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset); + return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset, + MF.getDataLayout().getAllocaAddrSpace()); } MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) { @@ -607,7 +601,8 @@ MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, int64_t Offset) { - return MachinePointerInfo(MF.getPSVManager().getStack(), Offset); + return MachinePointerInfo(MF.getPSVManager().getStack(), Offset, + MF.getDataLayout().getAllocaAddrSpace()); } MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3474,6 +3474,7 @@ EVT VT = LD->getValueType(0); EVT LoadedVT = LD->getMemoryVT(); SDLoc dl(LD); + auto &MF = DAG.getMachineFunction(); if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { @@ -3524,7 +3525,7 @@ LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo())); + MachinePointerInfo::getStack(MF, Offset))); // Increment the pointers. Offset += RegBytes; Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); @@ -3544,14 +3545,15 @@ // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo(), MemVT)); + MachinePointerInfo::getStack(MF, Offset), + MemVT)); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - MachinePointerInfo(), LoadedVT); + MachinePointerInfo::getStack(MF, 0), LoadedVT); // Callers expect a MERGE_VALUES node. return std::make_pair(Load, TF); Index: test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" + +define amdgpu_kernel void @test_fn(<16 x double> addrspace(1)* %results, i32 %i) { +entry: + %sPrivateStorage = alloca [2 x <16 x double>], align 128, addrspace(5) + %ptr = getelementptr inbounds [2 x <16 x double>], [2 x <16 x double>] addrspace(5)* %sPrivateStorage, i32 0, i32 0, i32 %i + %a = bitcast double addrspace(5)* %ptr to <16 x double> addrspace(5)* + ; CHECK: buffer_load_dword + ; CHECK-NOT: flat_load_dword + %r6 = load <16 x double>, <16 x double> addrspace(5)* %a, align 8 + store <16 x double> %r6, <16 x double> addrspace(1)* %results, align 128 + ret void +} +