Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -196,7 +196,7 @@ const AMDGPUSubtarget &STM = MF->getSubtarget(); amd_kernel_code_t KernelCode; - if (STM.isAmdCodeObjectV2(*MF)) { + if (STM.isAmdCodeObjectV2(MF->getFunction())) { getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF); getTargetStreamer()->EmitAMDKernelCodeT(KernelCode); } @@ -212,7 +212,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo(); const AMDGPUSubtarget &STM = MF->getSubtarget(); - if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(*MF)) { + if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) { SmallString<128> SymbolName; getNameWithPrefix(SymbolName, &MF->getFunction()), getTargetStreamer()->EmitAMDGPUSymbolType( @@ -1192,7 +1192,7 @@ // FIXME: Should use getKernArgSize Out.kernarg_segment_byte_size = - STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset()); + STM.getKernArgSegmentSize(MF.getFunction(), MFI->getABIArgOffset()); Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR; Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR; Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize; @@ -1221,7 +1221,7 @@ HSAMD::Kernel::CodeProps::Metadata HSACodeProps; HSACodeProps.mKernargSegmentSize = - STM.getKernArgSegmentSize(MF, MFI.getABIArgOffset()); + STM.getKernArgSegmentSize(MF.getFunction(), MFI.getABIArgOffset()); HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; HSACodeProps.mKernargSegmentAlign = Index: lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -221,7 +221,7 @@ CCValAssign &VA = ArgLocs[i]; lowerParameter(MIRBuilder, Arg->getType(), VA.getLocMemOffset() + - Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]); + Subtarget->getExplicitKernelArgOffset(F), VRegs[i]); } return true; Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -490,17 +490,17 @@ return HasUnpackedD16VMem; } - bool isMesaKernel(const MachineFunction &MF) const { - return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction().getCallingConv()); + bool isMesaKernel(const Function &F) const { + return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv()); } // Covers VS/PS/CS graphics shaders - bool isMesaGfxShader(const MachineFunction &MF) const { - return isMesa3DOS() && AMDGPU::isShader(MF.getFunction().getCallingConv()); + bool isMesaGfxShader(const Function &F) const { + return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); } - bool isAmdCodeObjectV2(const MachineFunction &MF) const { - return isAmdHsaOS() || isMesaKernel(MF); + bool isAmdCodeObjectV2(const Function &F) const { + return isAmdHsaOS() || isMesaKernel(F); } bool hasMad64_32() const { @@ -549,8 +549,8 @@ /// Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. - unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { - return isAmdCodeObjectV2(MF) ? 0 : 36; + unsigned getExplicitKernelArgOffset(const Function &F) const { + return isAmdCodeObjectV2(F) ? 0 : 36; } unsigned getAlignmentForImplicitArgPtr() const { @@ -559,11 +559,10 @@ /// \returns Number of bytes of arguments that are passed to a shader or /// kernel in addition to the explicit ones declared for the function. - unsigned getImplicitArgNumBytes(const MachineFunction &MF) const { - if (isMesaKernel(MF)) + unsigned getImplicitArgNumBytes(const Function &F) const { + if (isMesaKernel(F)) return 16; - return AMDGPU::getIntegerAttribute( - MF.getFunction(), "amdgpu-implicitarg-num-bytes", 0); + return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0); } // Scratch is allocated in 256 dword per wave blocks for the entire @@ -860,7 +859,7 @@ return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS; } - unsigned getKernArgSegmentSize(const MachineFunction &MF, + unsigned getKernArgSegmentSize(const Function &F, unsigned ExplictArgBytes) const; /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -412,9 +412,9 @@ return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } -unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, +unsigned SISubtarget::getKernArgSegmentSize(const Function &F, unsigned ExplicitArgBytes) const { - unsigned ImplicitBytes = getImplicitArgNumBytes(MF); + unsigned ImplicitBytes = getImplicitArgNumBytes(F); if (ImplicitBytes == 0) return ExplicitArgBytes; Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1596,7 +1596,8 @@ unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset(); - unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset(); + unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF.getFunction()) + + VA.getLocMemOffset(); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); SDValue Arg = DAG.getLoad( Index: lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIFrameLowering.cpp +++ lib/Target/AMDGPU/SIFrameLowering.cpp @@ -237,6 +237,7 @@ const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = &TII->getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); + const Function &F = MF.getFunction(); // We need to do the replacement of the private segment buffer and wave offset // register even if there are no stack objects. There could be stores to undef @@ -288,7 +289,7 @@ AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; - if (ST.isAmdCodeObjectV2(MF)) { + if (ST.isAmdCodeObjectV2(F)) { PreloadedPrivateBufferReg = MFI->getPreloadedReg( AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); } @@ -307,7 +308,7 @@ } if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) { - assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)); + assert(ST.isAmdCodeObjectV2(F) || ST.isMesaGfxShader(F)); MRI.addLiveIn(PreloadedPrivateBufferReg); MBB.addLiveIn(PreloadedPrivateBufferReg); } @@ -332,7 +333,7 @@ bool CopyBuffer = ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister && - ST.isAmdCodeObjectV2(MF) && + ST.isAmdCodeObjectV2(F) && ScratchRsrcReg != PreloadedPrivateBufferReg; // This needs to be careful of the copying order to avoid overwriting one of @@ -370,6 +371,7 @@ const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = &TII->getRegisterInfo(); + const Function &Fn = MF.getFunction(); DebugLoc DL; if (ST.isAmdPalOS()) { @@ -420,8 +422,7 @@ MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable, 0, 0); - unsigned Offset - = MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; + unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) .addReg(Rsrc01) .addImm(Offset) // offset @@ -430,9 +431,9 @@ .addMemOperand(MMO); return; } - if (ST.isMesaGfxShader(MF) + if (ST.isMesaGfxShader(Fn) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) { - assert(!ST.isAmdCodeObjectV2(MF)); + assert(!ST.isAmdCodeObjectV2(Fn)); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1450,7 +1450,7 @@ bool RequiresStackAccess = HasStackObjects || MFI.hasCalls(); const SISubtarget &ST = MF.getSubtarget(); - if (ST.isAmdCodeObjectV2(MF)) { + if (ST.isAmdCodeObjectV2(MF.getFunction())) { if (RequiresStackAccess) { // If we have stack objects, we unquestionably need the private buffer // resource. For the Code Object V2 ABI, this will be the first 4 user @@ -1562,12 +1562,12 @@ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); MachineFunction &MF = DAG.getMachineFunction(); + const Function &Fn = MF.getFunction(); FunctionType *FType = MF.getFunction().getFunctionType(); SIMachineFunctionInfo *Info = MF.getInfo(); const SISubtarget &ST = MF.getSubtarget(); if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) { - const Function &Fn = MF.getFunction(); DiagnosticInfoUnsupported NoGraphicsHSA( Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc()); DAG.getContext()->diagnose(NoGraphicsHSA); @@ -1678,7 +1678,7 @@ VT = Ins[i].VT; EVT MemVT = VA.getLocVT(); - const uint64_t Offset = Subtarget->getExplicitKernelArgOffset(MF) + + const uint64_t Offset = Subtarget->getExplicitKernelArgOffset(Fn) + VA.getLocMemOffset(); Info->setABIArgOffset(Offset + MemVT.getStoreSize()); @@ -1798,7 +1798,7 @@ auto &ArgUsageInfo = DAG.getPass()->getAnalysis(); - ArgUsageInfo.setFuncArgInfo(MF.getFunction(), Info->getArgInfo()); + ArgUsageInfo.setFuncArgInfo(Fn, Info->getArgInfo()); unsigned StackArgSize = CCInfo.getNextStackOffset(); Info->setBytesInStackArgArea(StackArgSize); @@ -4342,14 +4342,14 @@ switch (IntrinsicID) { case Intrinsic::amdgcn_implicit_buffer_ptr: { - if (getSubtarget()->isAmdCodeObjectV2(MF)) + if (getSubtarget()->isAmdCodeObjectV2(MF.getFunction())) return emitNonHSAIntrinsicError(DAG, DL, VT); return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR); } case Intrinsic::amdgcn_dispatch_ptr: case Intrinsic::amdgcn_queue_ptr: { - if (!Subtarget->isAmdCodeObjectV2(MF)) { + if (!Subtarget->isAmdCodeObjectV2(MF.getFunction())) { DiagnosticInfoUnsupported BadIntrin( MF.getFunction(), "unsupported hsa intrinsic without hsa target", DL.getDebugLoc()); Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -134,7 +134,7 @@ } } - bool IsCOV2 = ST.isAmdCodeObjectV2(MF); + bool IsCOV2 = ST.isAmdCodeObjectV2(F); if (IsCOV2) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; @@ -147,7 +147,7 @@ if (F.hasFnAttribute("amdgpu-dispatch-id")) DispatchID = true; - } else if (ST.isMesaGfxShader(MF)) { + } else if (ST.isMesaGfxShader(F)) { if (HasStackObjects || MaySpill) ImplicitBufferPtr = true; }