Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -21,6 +21,8 @@ namespace llvm { class SITargetLowering final : public AMDGPUTargetLowering { + SDValue LowerParameterPtr(SelectionDAG &DAG, SDLoc SL, SDValue Chain, + unsigned Offset) const; SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL, SDValue Chain, unsigned Offset, bool Signed) const; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -622,24 +622,29 @@ return TargetLowering::isTypeDesirableForOp(Op, VT); } -SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, - SDLoc SL, SDValue Chain, - unsigned Offset, bool Signed) const { +SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG, + SDLoc SL, SDValue Chain, + unsigned Offset) const { const DataLayout &DL = DAG.getDataLayout(); MachineFunction &MF = DAG.getMachineFunction(); const SIRegisterInfo *TRI = static_cast(Subtarget->getRegisterInfo()); unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); - Type *Ty = VT.getTypeForEVT(*DAG.getContext()); - MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, MRI.getLiveInVirtReg(InputPtrReg), PtrVT); - SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, - DAG.getConstant(Offset, SL, PtrVT)); + return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, + DAG.getConstant(Offset, SL, PtrVT)); +} +SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, + SDLoc SL, SDValue Chain, + unsigned Offset, bool Signed) const { + const DataLayout &DL = DAG.getDataLayout(); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); SDValue PtrOffset = DAG.getUNDEF(PtrVT); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); @@ -649,6 +654,7 @@ if (MemVT.isFloatingPoint()) ExtTy = ISD::EXTLOAD; + SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset); return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT, false, // isVolatile @@ -1565,6 +1571,12 @@ TRI->getPreloadedValue(MF, Reg), VT); } case Intrinsic::amdgcn_kernarg_segment_ptr: { + if (!Subtarget->isAmdHsaOS()) { + unsigned offset = getImplicitParameterOffset(MFI, GRID_DIM); + llvm::dbgs() << "FOUND offset of the first implicit arg " << offset << "\n"; + return LowerParameterPtr(DAG, DL, DAG.getEntryNode(), + getImplicitParameterOffset(MFI, GRID_DIM)); + } unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); Index: test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -5,7 +5,8 @@ ; HSA: enable_sgpr_kernarg_segment_ptr = 1 ; HSA: s_load_dword s{{[0-9]+}}, s[4:5], 0xa -; MESA: s_load_dword s{{[0-9]+}}, s[0:1], 0xa +; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15 +; MESA: s_load_dword s{{[0-9]+}}, s[0:1], 0x15 define void @test(i32 addrspace(1)* %out) #1 { %kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() %header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*