Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -334,6 +334,10 @@ GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; +def int_amdgcn_implicitarg_ptr : + GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + // __builtin_amdgcn_interp_p1 , , , def int_amdgcn_interp_p1 : GCCBuiltin<"__builtin_amdgcn_interp_p1">, Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -201,8 +201,9 @@ unsigned Reg, EVT VT) const; enum ImplicitParameter { - GRID_DIM, - GRID_OFFSET + FIRST_IMPLICIT, + GRID_DIM = FIRST_IMPLICIT, + GRID_OFFSET, }; /// \brief Helper function that returns the byte offset of the given Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -21,7 +21,9 @@ namespace llvm { class SITargetLowering final : public AMDGPUTargetLowering { - SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &DL, + SDValue LowerParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, + unsigned Offset) const; + SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain, unsigned Offset, bool Signed) const; SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const override; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -534,24 +534,29 @@ return TargetLowering::isTypeDesirableForOp(Op, VT); } -SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, - const SDLoc &SL, SDValue Chain, - unsigned Offset, bool Signed) const { +SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG, + const SDLoc &SL, SDValue Chain, + unsigned Offset) const { const DataLayout &DL = DAG.getDataLayout(); MachineFunction &MF = DAG.getMachineFunction(); const SIRegisterInfo *TRI = static_cast(Subtarget->getRegisterInfo()); unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); - Type *Ty = VT.getTypeForEVT(*DAG.getContext()); - MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, MRI.getLiveInVirtReg(InputPtrReg), PtrVT); - SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, - DAG.getConstant(Offset, SL, PtrVT)); + return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, + DAG.getConstant(Offset, SL, PtrVT)); +} +SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, + const SDLoc &SL, SDValue Chain, + unsigned Offset, bool Signed) const { + const DataLayout &DL = DAG.getDataLayout(); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); SDValue PtrOffset = DAG.getUNDEF(PtrVT); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); @@ -561,6 +566,7 @@ if (MemVT.isFloatingPoint()) ExtTy = ISD::EXTLOAD; + SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset); return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT, false, // isVolatile @@ -1540,6 +1546,10 @@ return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, TRI->getPreloadedValue(MF, Reg), VT); } + case Intrinsic::amdgcn_implicitarg_ptr: { + unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT); + return LowerParameterPtr(DAG, DL, DAG.getEntryNode(), offset); + } case Intrinsic::amdgcn_kernarg_segment_ptr: { unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); Index: test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -15,7 +15,20 @@ ret void } +; ALL-LABEL: {{^}}test_implicit: +; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15 +; MESA: s_load_dword s{{[0-9]+}}, s[0:1], 0x15 +define void @test_implicit(i32 addrspace(1)* %out) #1 { + %implicitarg.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() + %header.ptr = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)* + %gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10 + %value = load i32, i32 addrspace(2)* %gep + store i32 %value, i32 addrspace(1)* %out + ret void +} + declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0 +declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0 attributes #0 = { nounwind readnone } attributes #1 = { nounwind }