Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1178,7 +1178,7 @@ // FIXME: Should use getKernArgSize Out.kernarg_segment_byte_size = - STM.getKernArgSegmentSize(MF.getFunction(), MFI->getABIArgOffset()); + STM.getKernArgSegmentSize(MF.getFunction(), MFI->getExplicitKernArgSize()); Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR; Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR; Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize; @@ -1205,7 +1205,7 @@ HSAMD::Kernel::CodeProps::Metadata HSACodeProps; HSACodeProps.mKernargSegmentSize = - STM.getKernArgSegmentSize(MF.getFunction(), MFI.getABIArgOffset()); + STM.getKernArgSegmentSize(MF.getFunction(), MFI.getExplicitKernArgSize()); HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; HSACodeProps.mKernargSegmentAlign = Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -291,7 +291,7 @@ /// Helper function that returns the byte offset of the given /// type of implicit parameter. - uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, + uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const; AMDGPUAS getAMDGPUAS() const { Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3979,9 +3979,13 @@ } uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( - const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const { - unsigned Alignment = Subtarget->getAlignmentForImplicitArgPtr(); - uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), Alignment); + const MachineFunction &MF, const ImplicitParameter Param) const { + const AMDGPUMachineFunction *MFI = MF.getInfo(); + const AMDGPUSubtarget &ST = MF.getSubtarget(); + unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction()); + unsigned Alignment = ST.getAlignmentForImplicitArgPtr(); + uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) + + ExplicitArgOffset; switch (Param) { case GRID_DIM: return ArgOffset; Index: lib/Target/AMDGPU/AMDGPUMachineFunction.h =================================================================== --- lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -15,22 +15,20 @@ namespace llvm { +class AMDGPUSubtarget; + class AMDGPUMachineFunction : public MachineFunctionInfo { /// A map to keep track of local memory objects and their offsets within the /// local memory space. SmallDenseMap LocalMemoryObjects; protected: - uint64_t KernArgSize; + uint64_t ExplicitKernArgSize; unsigned MaxKernArgAlign; /// Number of bytes in the LDS that are being used. unsigned LDSSize; - // FIXME: This should probably be removed. - /// Start of implicit kernel args - unsigned ABIArgOffset; - // Kernels + shaders. i.e. functions called by the driver and not called // by other functions. bool IsEntryFunction; @@ -48,31 +46,23 @@ uint64_t allocateKernArg(uint64_t Size, unsigned Align) { assert(isPowerOf2_32(Align)); - KernArgSize = alignTo(KernArgSize, Align); + ExplicitKernArgSize = alignTo(ExplicitKernArgSize, Align); - uint64_t Result = KernArgSize; - KernArgSize += Size; + uint64_t Result = ExplicitKernArgSize; + ExplicitKernArgSize += Size; MaxKernArgAlign = std::max(Align, MaxKernArgAlign); return Result; } - uint64_t getKernArgSize() const { - return KernArgSize; + uint64_t getExplicitKernArgSize() const { + return ExplicitKernArgSize; } unsigned getMaxKernArgAlign() const { return MaxKernArgAlign; } - void setABIArgOffset(unsigned NewOffset) { - ABIArgOffset = NewOffset; - } - - unsigned getABIArgOffset() const { - return ABIArgOffset; - } - unsigned getLDSSize() const { return LDSSize; } Index: lib/Target/AMDGPU/AMDGPUMachineFunction.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -17,10 +17,9 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo(), LocalMemoryObjects(), - KernArgSize(0), + ExplicitKernArgSize(0), MaxKernArgAlign(0), LDSSize(0), - ABIArgOffset(0), IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())), NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath), MemoryBound(false), Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -560,7 +560,7 @@ case Intrinsic::r600_implicitarg_ptr: { MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS); - uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT); + uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT); return DAG.getConstant(ByteOffset, DL, PtrVT); } case Intrinsic::r600_read_ngroups_x: @@ -1544,8 +1544,6 @@ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); MachineFunction &MF = DAG.getMachineFunction(); - R600MachineFunctionInfo *MFI = MF.getInfo(); - SmallVector LocalIns; if (AMDGPU::isShader(CallConv)) { @@ -1609,7 +1607,6 @@ // 4 is the preferred alignment for the CONSTANT memory space. InVals.push_back(Arg); - MFI->setABIArgOffset(Offset + MemVT.getStoreSize()); } return Chain; } Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1078,8 +1078,8 @@ SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const { - auto MFI = DAG.getMachineFunction().getInfo(); - uint64_t Offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT); + uint64_t Offset = getImplicitParameterOffset(DAG.getMachineFunction(), + FIRST_IMPLICIT); return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset); } @@ -1749,7 +1749,6 @@ EVT MemVT = VA.getLocVT(); const uint64_t Offset = ExplicitOffset + VA.getLocMemOffset(); - Info->setABIArgOffset(Offset + MemVT.getStoreSize()); unsigned Align = MinAlign(KernelArgBaseAlign, Offset); // The first 36 bytes of the input buffer contains information about