Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -145,7 +145,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo(); const AMDGPUSubtarget &STM = MF->getSubtarget(); - if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) { + if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(*MF)) { AMDGPUTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); SmallString<128> SymbolName; Index: lib/Target/AMDGPU/AMDGPUMachineFunction.h =================================================================== --- lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -30,7 +30,10 @@ /// Start of implicit kernel args unsigned ABIArgOffset; - bool IsKernel; + // Kernels + shaders. i.e. functions called by the driver and not not called + // by other functions. + bool IsEntryFunction; + bool NoSignedZerosFPMath; public: @@ -67,8 +70,8 @@ return LDSSize; } - bool isKernel() const { - return IsKernel; + bool isEntryFunction() const { + return IsEntryFunction; } bool hasNoSignedZerosFPMath() const { Index: lib/Target/AMDGPU/AMDGPUMachineFunction.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -12,6 +12,21 @@ using namespace llvm; +static bool isEntryFunctionCC(CallingConv::ID CC) { + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + + // TODO: Remove callable functions. + default: + return true; + } +} + AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo(), LocalMemoryObjects(), @@ -19,8 +34,7 @@ MaxKernArgAlign(0), LDSSize(0), ABIArgOffset(0), - IsKernel(MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_KERNEL || - MF.getFunction()->getCallingConv() == CallingConv::SPIR_KERNEL), + IsEntryFunction(isEntryFunctionCC(MF.getFunction()->getCallingConv())), NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) { // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, // except reserved size is not correctly aligned.