Index: include/llvm/CodeGen/MachineMemOperand.h =================================================================== --- include/llvm/CodeGen/MachineMemOperand.h +++ include/llvm/CodeGen/MachineMemOperand.h @@ -46,22 +46,29 @@ int64_t Offset; uint8_t StackID; + /// Address space of the base value. + unsigned AddrSpace; explicit MachinePointerInfo(const Value *v = nullptr, int64_t offset = 0, uint8_t ID = 0) - : V(v), Offset(offset), StackID(ID) {} + : V(v), Offset(offset), StackID(ID) { + AddrSpace = v ? v->getType()->getPointerAddressSpace() : 0; + } - explicit MachinePointerInfo(const PseudoSourceValue *v, - int64_t offset = 0, - uint8_t ID = 0) - : V(v), Offset(offset), StackID(ID) {} + /// For stack and fixed stack, \p AS specifies the address space. + /// For other kinds, \p AS is ignored. + explicit MachinePointerInfo(const PseudoSourceValue *v, int64_t offset = 0, + uint8_t ID = 0, unsigned AS = 0) + : V(v), Offset(offset), StackID(ID) { + AddrSpace = v && v->hasAllocaAddrSpace() ? AS : 0; + } MachinePointerInfo getWithOffset(int64_t O) const { if (V.isNull()) return MachinePointerInfo(); if (V.is()) - return MachinePointerInfo(V.get(), Offset+O, StackID); - return MachinePointerInfo(V.get(), Offset+O, - StackID); + return MachinePointerInfo(V.get(), Offset + O, StackID); + return MachinePointerInfo(V.get(), Offset + O, + StackID, AddrSpace); } /// Return true if memory region [V, V+Offset+Size) is known to be @@ -70,7 +77,7 @@ const DataLayout &DL) const; /// Return the LLVM IR address space number that this pointer points into. - unsigned getAddrSpace() const; + unsigned getAddrSpace() const { return AddrSpace; } /// Return a MachinePointerInfo record that refers to the constant pool. static MachinePointerInfo getConstantPool(MachineFunction &MF); Index: include/llvm/CodeGen/PseudoSourceValue.h =================================================================== --- include/llvm/CodeGen/PseudoSourceValue.h +++ include/llvm/CodeGen/PseudoSourceValue.h @@ -64,6 +64,9 @@ PSVKind kind() const { return Kind; } + bool hasAllocaAddrSpace() const { + return Kind == Stack || Kind == FixedStack; + } bool isStack() const { return Kind == Stack; } bool isGOT() const { return Kind == GOT; } bool isConstantPool() const { return Kind == ConstantPool; } Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -576,13 +576,6 @@ // MachineMemOperand Implementation //===----------------------------------------------------------------------===// -/// getAddrSpace - Return the LLVM IR address space number that this pointer -/// points into. -unsigned MachinePointerInfo::getAddrSpace() const { - if (V.isNull() || V.is()) return 0; - return cast(V.get()->getType())->getAddressSpace(); -} - /// isDereferenceable - Return true if V is always dereferenceable for /// Offset + Size byte. bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, @@ -608,7 +601,8 @@ /// the specified FrameIndex. MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF, int FI, int64_t Offset) { - return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset); + return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset, 0, + MF.getDataLayout().getAllocaAddrSpace()); } MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) { @@ -620,9 +614,9 @@ } MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, - int64_t Offset, - uint8_t ID) { - return MachinePointerInfo(MF.getPSVManager().getStack(), Offset,ID); + int64_t Offset, uint8_t ID) { + return MachinePointerInfo(MF.getPSVManager().getStack(), Offset, ID, + MF.getDataLayout().getAllocaAddrSpace()); } MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3474,6 +3474,7 @@ EVT VT = LD->getValueType(0); EVT LoadedVT = LD->getMemoryVT(); SDLoc dl(LD); + auto &MF = DAG.getMachineFunction(); if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { @@ -3504,7 +3505,7 @@ // Make sure the stack slot is also aligned for the register type. SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - + auto FrameIndex = cast(StackBase.getNode())->getIndex(); SmallVector Stores; SDValue StackPtr = StackBase; unsigned Offset = 0; @@ -3523,8 +3524,9 @@ MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo())); + Stores.push_back(DAG.getStore( + Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset))); // Increment the pointers. Offset += RegBytes; Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); @@ -3543,15 +3545,17 @@ // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo(), MemVT)); + Stores.push_back(DAG.getTruncStore( + Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT)); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - MachinePointerInfo(), LoadedVT); + MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), + LoadedVT); // Callers expect a MERGE_VALUES node. return std::make_pair(Load, TF); Index: test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" + +define amdgpu_kernel void @test_fn(<16 x double> addrspace(1)* %results, i32 %i) { +entry: + %sPrivateStorage = alloca [2 x <16 x double>], align 128, addrspace(5) + %ptr = getelementptr inbounds [2 x <16 x double>], [2 x <16 x double>] addrspace(5)* %sPrivateStorage, i32 0, i32 0, i32 %i + %a = bitcast double addrspace(5)* %ptr to <16 x double> addrspace(5)* + ; CHECK: buffer_load_dword + ; CHECK-NOT: flat_load_dword + %r6 = load <16 x double>, <16 x double> addrspace(5)* %a, align 8 + store <16 x double> %r6, <16 x double> addrspace(1)* %results, align 128 + ret void +} +