Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -193,13 +193,10 @@ const SIMachineFunctionInfo &MFI = *MF->getInfo(); if (!MFI.isEntryFunction()) return; - if (IsaInfo::hasCodeObjectV3(getSTI()) && - TM.getTargetTriple().getOS() == Triple::AMDHSA) - return; const GCNSubtarget &STM = MF->getSubtarget(); const Function &F = MF->getFunction(); - if (STM.isAmdCodeObjectV2(F) && + if (!STM.hasCodeObjectV3() && STM.isAmdHsaOrMesa(F) && (F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL)) { amd_kernel_code_t KernelCode; @@ -210,7 +207,8 @@ if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; - HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo); + if (!STM.hasCodeObjectV3() && STM.isAmdHsaOS()) + HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo); } void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { @@ -259,7 +257,7 @@ const SIMachineFunctionInfo *MFI = MF->getInfo(); const GCNSubtarget &STM = MF->getSubtarget(); - if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) { + if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) { SmallString<128> SymbolName; getNameWithPrefix(SymbolName, &MF->getFunction()), getTargetStreamer()->EmitAMDGPUSymbolType( Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -135,7 +135,7 @@ return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv()); } - bool isAmdCodeObjectV2(const Function &F) const { + bool isAmdHsaOrMesa(const Function &F) const { return isAmdHsaOS() || isMesaKernel(F); } @@ -202,7 +202,7 @@ /// Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const Function &F) const { - return isAmdCodeObjectV2(F) ? 0 : 36; + return isAmdHsaOrMesa(F) ? 0 : 36; } /// \returns Maximum number of work groups per compute unit supported by the Index: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -289,7 +289,7 @@ AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; - if (ST.isAmdCodeObjectV2(F)) { + if (ST.isAmdHsaOrMesa(F)) { PreloadedPrivateBufferReg = MFI->getPreloadedReg( AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); } @@ -308,7 +308,7 @@ } if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) { - assert(ST.isAmdCodeObjectV2(F) || ST.isMesaGfxShader(F)); + assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F)); MRI.addLiveIn(PreloadedPrivateBufferReg); MBB.addLiveIn(PreloadedPrivateBufferReg); } @@ -333,7 +333,7 @@ bool CopyBuffer = ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister && - ST.isAmdCodeObjectV2(F) && + ST.isAmdHsaOrMesa(F) && ScratchRsrcReg != PreloadedPrivateBufferReg; // This needs to be careful of the copying order to avoid overwriting one of @@ -433,7 +433,7 @@ } if (ST.isMesaGfxShader(Fn) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) { - assert(!ST.isAmdCodeObjectV2(Fn)); + assert(!ST.isAmdHsaOrMesa(Fn)); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1653,7 +1653,7 @@ bool RequiresStackAccess = HasStackObjects || MFI.hasCalls(); const GCNSubtarget &ST = MF.getSubtarget(); - if (ST.isAmdCodeObjectV2(MF.getFunction())) { + if (ST.isAmdHsaOrMesa(MF.getFunction())) { if (RequiresStackAccess) { // If we have stack objects, we unquestionably need the private buffer // resource. For the Code Object V2 ABI, this will be the first 4 user @@ -4809,14 +4809,14 @@ switch (IntrinsicID) { case Intrinsic::amdgcn_implicit_buffer_ptr: { - if (getSubtarget()->isAmdCodeObjectV2(MF.getFunction())) + if (getSubtarget()->isAmdHsaOrMesa(MF.getFunction())) return emitNonHSAIntrinsicError(DAG, DL, VT); return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR); } case Intrinsic::amdgcn_dispatch_ptr: case Intrinsic::amdgcn_queue_ptr: { - if (!Subtarget->isAmdCodeObjectV2(MF.getFunction())) { + if (!Subtarget->isAmdHsaOrMesa(MF.getFunction())) { DiagnosticInfoUnsupported BadIntrin( MF.getFunction(), "unsupported hsa intrinsic without hsa target", DL.getDebugLoc()); Index: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -137,8 +137,8 @@ } } - bool IsCOV2 = ST.isAmdCodeObjectV2(F); - if (IsCOV2) { + bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); + if (isAmdHsaOrMesa) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; @@ -158,7 +158,7 @@ if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) KernargSegmentPtr = true; - if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) { + if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) { // TODO: This could be refined a lot. The attribute is a poor way of // detecting calls that may require it before argument lowering. if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch")) Index: llvm/trunk/test/CodeGen/AMDGPU/code-object-v3.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/code-object-v3.ll +++ llvm/trunk/test/CodeGen/AMDGPU/code-object-v3.ll @@ -2,6 +2,10 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+code-object-v3 < %s | llvm-readobj -elf-output-style=GNU -notes -relocations -sections -symbols | FileCheck --check-prefixes=ALL-ELF,OSABI-AMDHSA-ELF %s ; ALL-ASM-LABEL: {{^}}fadd: + +; OSABI-AMDHSA-ASM-NOT: .amdgpu_hsa_kernel +; OSABI-AMDHSA-ASM-NOT: .amd_kernel_code_t + ; OSABI-AMDHSA-ASM: s_endpgm ; OSABI-AMDHSA-ASM: .section .rodata,#alloc ; OSABI-AMDHSA-ASM: .p2align 6 @@ -16,6 +20,10 @@ ; OSABI-AMDHSA-ASM: .text ; ALL-ASM-LABEL: {{^}}fsub: + +; OSABI-AMDHSA-ASM-NOT: .amdgpu_hsa_kernel +; OSABI-AMDHSA-ASM-NOT: .amd_kernel_code_t + ; OSABI-AMDHSA-ASM: s_endpgm ; OSABI-AMDHSA-ASM: .section .rodata,#alloc ; OSABI-AMDHSA-ASM: .p2align 6