Index: lld/test/ELF/amdgpu-abi-version.s =================================================================== --- lld/test/ELF/amdgpu-abi-version.s +++ lld/test/ELF/amdgpu-abi-version.s @@ -1,5 +1,5 @@ # REQUIRES: amdgpu -# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s -o %t.o +# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj %s -o %t.o # RUN: ld.lld -shared %t.o -o %t.so # RUN: llvm-readobj --file-headers %t.so | FileCheck %s Index: lld/test/ELF/lto/amdgcn-oses.ll =================================================================== --- lld/test/ELF/lto/amdgcn-oses.ll +++ lld/test/ELF/lto/amdgcn-oses.ll @@ -15,7 +15,7 @@ ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40) -; AMDHSA: ABIVersion: 1 +; AMDHSA: ABIVersion: 2 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41) ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) Index: llvm/include/llvm/BinaryFormat/ELF.h =================================================================== --- llvm/include/llvm/BinaryFormat/ELF.h +++ llvm/include/llvm/BinaryFormat/ELF.h @@ -370,6 +370,7 @@ // was never defined for V1. ELFABIVERSION_AMDGPU_HSA_V2 = 0, ELFABIVERSION_AMDGPU_HSA_V3 = 1, + ELFABIVERSION_AMDGPU_HSA_V4 = 2 }; #define ELF_RELOC(name, value) name = value, @@ -738,10 +739,51 @@ // Indicates if the "xnack" target feature is enabled for all code contained // in the object. - EF_AMDGPU_XNACK = 0x100, - // Indicates if the "sram-ecc" target feature is enabled for all code + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2. + EF_AMDGPU_FEATURE_XNACK_V2 = 0x01, + // Indicates if the trap handler is enabled for all code contained + // in the object. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2. + EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02, + + // Indicates if the "xnack" target feature is enabled for all code contained + // in the object. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3. + EF_AMDGPU_FEATURE_XNACK_V3 = 0x100, + // Indicates if the "sramecc" target feature is enabled for all code // contained in the object. - EF_AMDGPU_SRAM_ECC = 0x200, + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3. + EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200, + + // XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4. + EF_AMDGPU_FEATURE_XNACK_V4 = 0x300, + // XNACK is not supported. + EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000, + // XNACK is any/default/unspecified. + EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100, + // XNACK is off. + EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200, + // XNACK is on. + EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300, + + // SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4. + EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00, + // SRAMECC is not supported. + EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000, + // SRAMECC is any/default/unspecified. + EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400, + // SRAMECC is off. + EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800, + // SRAMECC is on. + EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00, }; // ELF Relocation types for AMDGPU @@ -1554,15 +1596,18 @@ SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON }; -// AMD specific notes. (Code Object V2) +// AMD vendor specific notes. (Code Object V2) enum { - // Note types with values between 0 and 9 (inclusive) are reserved. - NT_AMD_AMDGPU_HSA_METADATA = 10, - NT_AMD_AMDGPU_ISA = 11, - NT_AMD_AMDGPU_PAL_METADATA = 12 + NT_AMD_HSA_CODE_OBJECT_VERSION = 1, + NT_AMD_HSA_HSAIL = 2, + NT_AMD_HSA_ISA_VERSION = 3, + // Note types with values between 4 and 9 (inclusive) are reserved. + NT_AMD_HSA_METADATA = 10, + NT_AMD_HSA_ISA_NAME = 11, + NT_AMD_PAL_METADATA = 12 }; -// AMDGPU specific notes. (Code Object V3) +// AMDGPU vendor specific notes. (Code Object V3) enum { // Note types with values between 0 and 31 (inclusive) are reserved. NT_AMDGPU_METADATA = 32 Index: llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h =================================================================== --- llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -491,6 +491,9 @@ return nullptr; } + // For any initialization at the beginning of parsing. + virtual void onBeginOfFile() {} + // For any checks or cleanups at the end of parsing. virtual void onEndOfFile() {} }; Index: llvm/include/llvm/MC/MCSubtargetInfo.h =================================================================== --- llvm/include/llvm/MC/MCSubtargetInfo.h +++ llvm/include/llvm/MC/MCSubtargetInfo.h @@ -89,6 +89,7 @@ const unsigned *OperandCycles; // Itinerary operand cycles const unsigned *ForwardingPaths; FeatureBitset FeatureBits; // Feature bits for current CPU + FS + std::string FeatureString; // Feature string public: MCSubtargetInfo(const MCSubtargetInfo &) = default; @@ -112,6 +113,8 @@ FeatureBits = FeatureBits_; } + StringRef getFeatureString() const { return FeatureString; } + bool hasFeature(unsigned Feature) const { return FeatureBits[Feature]; } Index: llvm/include/llvm/Support/AMDGPUMetadata.h =================================================================== --- llvm/include/llvm/Support/AMDGPUMetadata.h +++ llvm/include/llvm/Support/AMDGPUMetadata.h @@ -29,10 +29,20 @@ //===----------------------------------------------------------------------===// namespace HSAMD { -/// HSA metadata major version. -constexpr uint32_t VersionMajor = 1; -/// HSA metadata minor version. -constexpr uint32_t VersionMinor = 0; +/// HSA metadata major version for code object V2. +constexpr uint32_t VersionMajorV2 = 1; +/// HSA metadata minor version for code object V2. +constexpr uint32_t VersionMinorV2 = 0; + +/// HSA metadata major version for code object V3. +constexpr uint32_t VersionMajorV3 = 1; +/// HSA metadata minor version for code object V3. +constexpr uint32_t VersionMinorV3 = 0; + +/// HSA metadata major version for code object V4. +constexpr uint32_t VersionMajorV4 = 1; +/// HSA metadata minor version for code object V4. +constexpr uint32_t VersionMinorV4 = 1; /// HSA metadata beginning assembler directive. constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata"; Index: llvm/include/llvm/Support/AMDHSAKernelDescriptor.h =================================================================== --- llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -165,7 +165,8 @@ struct kernel_descriptor_t { uint32_t group_segment_fixed_size; uint32_t private_segment_fixed_size; - uint8_t reserved0[8]; + uint32_t kernarg_size; + uint8_t reserved0[4]; int64_t kernel_code_entry_byte_offset; uint8_t reserved1[20]; uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+ @@ -178,7 +179,8 @@ enum : uint32_t { GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, - RESERVED0_OFFSET = 8, + KERNARG_SIZE_OFFSET = 8, + RESERVED0_OFFSET = 12, KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, RESERVED1_OFFSET = 24, COMPUTE_PGM_RSRC3_OFFSET = 44, @@ -197,6 +199,9 @@ static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, "invalid offset for private_segment_fixed_size"); +static_assert(offsetof(kernel_descriptor_t, kernarg_size) == + KERNARG_SIZE_OFFSET, + "invalid offset for kernarg_size"); static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, "invalid offset for reserved0"); static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == Index: llvm/lib/MC/MCParser/AsmParser.cpp =================================================================== --- llvm/lib/MC/MCParser/AsmParser.cpp +++ llvm/lib/MC/MCParser/AsmParser.cpp @@ -919,6 +919,8 @@ (void)InsertResult; } + getTargetParser().onBeginOfFile(); + // While we have input, parse each statement. while (Lexer.isNot(AsmToken::Eof)) { ParseStatementInfo Info(&AsmStrRewrites); Index: llvm/lib/MC/MCParser/MasmParser.cpp =================================================================== --- llvm/lib/MC/MCParser/MasmParser.cpp +++ llvm/lib/MC/MCParser/MasmParser.cpp @@ -1235,6 +1235,8 @@ (void)InsertResult; } + getTargetParser().onBeginOfFile(); + // While we have input, parse each statement. while (Lexer.isNot(AsmToken::Eof) || SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) { Index: llvm/lib/MC/MCSubtargetInfo.cpp =================================================================== --- llvm/lib/MC/MCSubtargetInfo.cpp +++ llvm/lib/MC/MCSubtargetInfo.cpp @@ -208,6 +208,8 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU, StringRef FS) { FeatureBits = getFeatures(CPU, TuneCPU, FS, ProcDesc, ProcFeatures); + FeatureString = std::string(FS); + if (!TuneCPU.empty()) CPUSchedModel = &getSchedModelForCPU(TuneCPU); else @@ -217,6 +219,7 @@ void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS) { FeatureBits = getFeatures(CPU, TuneCPU, FS, ProcDesc, ProcFeatures); + FeatureString = std::string(FS); } MCSubtargetInfo::MCSubtargetInfo(const Triple &TT, StringRef C, StringRef TC, Index: llvm/lib/ObjectYAML/ELFYAML.cpp =================================================================== --- llvm/lib/ObjectYAML/ELFYAML.cpp +++ llvm/lib/ObjectYAML/ELFYAML.cpp @@ -155,9 +155,9 @@ ECase(NT_FREEBSD_PROCSTAT_PSSTRINGS); ECase(NT_FREEBSD_PROCSTAT_AUXV); // AMD specific notes. (Code Object V2) - ECase(NT_AMD_AMDGPU_HSA_METADATA); - ECase(NT_AMD_AMDGPU_ISA); - ECase(NT_AMD_AMDGPU_PAL_METADATA); + ECase(NT_AMD_HSA_METADATA); + ECase(NT_AMD_HSA_ISA_NAME); + ECase(NT_AMD_PAL_METADATA); // AMDGPU specific notes. (Code Object V3) ECase(NT_AMDGPU_METADATA); #undef ECase @@ -544,8 +544,33 @@ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1031, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1032, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1033, EF_AMDGPU_MACH); - BCase(EF_AMDGPU_XNACK); - BCase(EF_AMDGPU_SRAM_ECC); + switch (Object->Header.ABIVersion) { + default: + // ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags. + LLVM_FALLTHROUGH; + case ELF::ELFABIVERSION_AMDGPU_HSA_V3: + BCase(EF_AMDGPU_FEATURE_XNACK_V3); + BCase(EF_AMDGPU_FEATURE_SRAMECC_V3); + break; + case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4, + EF_AMDGPU_FEATURE_XNACK_V4); + BCaseMask(EF_AMDGPU_FEATURE_XNACK_ANY_V4, + EF_AMDGPU_FEATURE_XNACK_V4); + BCaseMask(EF_AMDGPU_FEATURE_XNACK_OFF_V4, + EF_AMDGPU_FEATURE_XNACK_V4); + BCaseMask(EF_AMDGPU_FEATURE_XNACK_ON_V4, + EF_AMDGPU_FEATURE_XNACK_V4); + BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4, + EF_AMDGPU_FEATURE_SRAMECC_V4); + BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_ANY_V4, + EF_AMDGPU_FEATURE_SRAMECC_V4); + BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_OFF_V4, + EF_AMDGPU_FEATURE_SRAMECC_V4); + BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_ON_V4, + EF_AMDGPU_FEATURE_SRAMECC_V4); + break; + } break; default: break; Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -56,6 +56,8 @@ int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const; }; + void initializeTargetID(const Module &M); + SIProgramInfo CurrentProgramInfo; DenseMap CallGraphResourceInfo; Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -97,12 +97,14 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)) { + : AsmPrinter(TM, std::move(Streamer)) { if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { if (isHsaAbiVersion2(getGlobalSTI())) { HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2()); - } else { + } else if (isHsaAbiVersion3(getGlobalSTI())) { HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3()); + } else { + HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4()); } } } @@ -122,34 +124,34 @@ } void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) { - if (isHsaAbiVersion3(getGlobalSTI())) { - std::string ExpectedTarget; - raw_string_ostream ExpectedTargetOS(ExpectedTarget); - IsaInfo::streamIsaVersion(getGlobalSTI(), ExpectedTargetOS); - - getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget); - } + // TODO: Which one is called first, emitStartOfAsmFile or + // emitFunctionBodyStart? + if (getTargetStreamer() && !getTargetStreamer()->getTargetID()) + initializeTargetID(M); if (TM.getTargetTriple().getOS() != Triple::AMDHSA && TM.getTargetTriple().getOS() != Triple::AMDPAL) return; + if (isHsaAbiVersion3Or4(getGlobalSTI())) + getTargetStreamer()->EmitDirectiveAMDGCNTarget(); + if (TM.getTargetTriple().getOS() == Triple::AMDHSA) - HSAMetadataStream->begin(M); + HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID()); if (TM.getTargetTriple().getOS() == Triple::AMDPAL) getTargetStreamer()->getPALMetadata()->readFromIR(M); - if (isHsaAbiVersion3(getGlobalSTI())) + if (isHsaAbiVersion3Or4(getGlobalSTI())) return; - // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2. + // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2. if (TM.getTargetTriple().getOS() == Triple::AMDHSA) getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); - // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2. + // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2. IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU()); - getTargetStreamer()->EmitDirectiveHSACodeObjectISA( + getTargetStreamer()->EmitDirectiveHSACodeObjectISAV2( Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU"); } @@ -159,15 +161,11 @@ return; if (TM.getTargetTriple().getOS() != Triple::AMDHSA || - isHsaAbiVersion2(getGlobalSTI())) { - // Emit ISA Version (NT_AMD_AMDGPU_ISA). - std::string ISAVersionString; - raw_string_ostream ISAVersionStream(ISAVersionString); - IsaInfo::streamIsaVersion(getGlobalSTI(), ISAVersionStream); - getTargetStreamer()->EmitISAVersion(ISAVersionStream.str()); - } + isHsaAbiVersion2(getGlobalSTI())) + getTargetStreamer()->EmitISAVersion(); // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA). + // Emit HSA Metadata (NT_AMD_HSA_METADATA). if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { HSAMetadataStream->end(); bool Success = HSAMetadataStream->emitTo(*getTargetStreamer()); @@ -192,11 +190,37 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() { const SIMachineFunctionInfo &MFI = *MF->getInfo(); + const GCNSubtarget &STM = MF->getSubtarget(); + const Function &F = MF->getFunction(); + + // TODO: Which one is called first, emitStartOfAsmFile or + // emitFunctionBodyStart? + if (getTargetStreamer() && !getTargetStreamer()->getTargetID()) + initializeTargetID(*F.getParent()); + + const auto &FunctionTargetID = STM.getTargetID(); + // Make sure function's xnack settings are compatible with module's + // xnack settings. + if (FunctionTargetID.isXnackSupported() && + FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any && + FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) { + OutContext.reportError({}, "xnack setting of '" + Twine(MF->getName()) + + "' function does not match module xnack setting"); + return; + } + // Make sure function's sramecc settings are compatible with module's + // sramecc settings. + if (FunctionTargetID.isSramEccSupported() && + FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any && + FunctionTargetID.getSramEccSetting() != getTargetStreamer()->getTargetID()->getSramEccSetting()) { + OutContext.reportError({}, "sramecc setting of '" + Twine(MF->getName()) + + "' function does not match module sramecc setting"); + return; + } + if (!MFI.isEntryFunction()) return; - const GCNSubtarget &STM = MF->getSubtarget(); - const Function &F = MF->getFunction(); if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) && (F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL)) { @@ -232,26 +256,25 @@ if (ReadOnlySection.getAlignment() < 64) ReadOnlySection.setAlignment(Align(64)); - const MCSubtargetInfo &STI = MF->getSubtarget(); + const GCNSubtarget &STM = MF->getSubtarget(); SmallString<128> KernelName; getNameWithPrefix(KernelName, &MF->getFunction()); getTargetStreamer()->EmitAmdhsaKernelDescriptor( - STI, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo), + STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo), CurrentProgramInfo.NumVGPRsForWavesPerEU, CurrentProgramInfo.NumSGPRsForWavesPerEU - - IsaInfo::getNumExtraSGPRs(&STI, + IsaInfo::getNumExtraSGPRs(&STM, CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed), - CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed, - hasXNACK(STI)); + CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed); Streamer.PopSection(); } void AMDGPUAsmPrinter::emitFunctionEntryLabel() { if (TM.getTargetTriple().getOS() == Triple::AMDHSA && - isHsaAbiVersion3(getGlobalSTI())) { + isHsaAbiVersion3Or4(getGlobalSTI())) { AsmPrinter::emitFunctionEntryLabel(); return; } @@ -401,6 +424,8 @@ const MachineFunction &MF, const SIProgramInfo &PI) const { const GCNSubtarget &STM = MF.getSubtarget(); + const Function &F = MF.getFunction(); + amdhsa::kernel_descriptor_t KernelDescriptor; memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor)); @@ -410,6 +435,10 @@ KernelDescriptor.group_segment_fixed_size = PI.LDSSize; KernelDescriptor.private_segment_fixed_size = PI.ScratchSize; + + Align MaxKernArgAlign; + KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign); + KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(); KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2; KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF); @@ -598,6 +627,36 @@ return false; } +// TODO: Fold this into emitFunctionBodyStart. +void AMDGPUAsmPrinter::initializeTargetID(const Module &M) { + // In the beginning all features are either 'Any' or 'NotSupported', + // depending on global target features. This will cover empty modules. + getTargetStreamer()->initializeTargetID( + *getGlobalSTI(), getGlobalSTI()->getFeatureString()); + + // If module is empty, we are done. + if (M.empty()) + return; + + // If module is not empty, need to find first 'Off' or 'On' feature + // setting per feature from functions in module. + for (auto &F : M) { + auto &TSTargetID = getTargetStreamer()->getTargetID(); + if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) && + (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff())) + break; + + const GCNSubtarget &STM = TM.getSubtarget(F); + const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID(); + if (TSTargetID->isXnackSupported()) + if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any) + TSTargetID->setXnackSetting(STMTargetID.getXnackSetting()); + if (TSTargetID->isSramEccSupported()) + if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any) + TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting()); + } +} + uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const { const GCNSubtarget &STM = MF.getSubtarget(); const SIInstrInfo *TII = STM.getInstrInfo(); @@ -632,8 +691,8 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( const GCNSubtarget &ST) const { - return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST, - UsesVCC, UsesFlatScratch); + return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs( + &ST, UsesVCC, UsesFlatScratch, ST.getTargetID().isXnackOnOrAny()); } int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs( Index: llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/BinaryFormat/MsgPackDocument.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/Alignment.h" @@ -40,7 +41,8 @@ virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0; - virtual void begin(const Module &Mod) = 0; + virtual void begin(const Module &Mod, + const IsaInfo::AMDGPUTargetID &TargetID) = 0; virtual void end() = 0; @@ -48,8 +50,9 @@ const SIProgramInfo &ProgramInfo) = 0; }; -class MetadataStreamerV3 final : public MetadataStreamer { -private: +// TODO: Rename MetadataStreamerV3 -> MetadataStreamerMsgPackV3. +class MetadataStreamerV3 : public MetadataStreamer { +protected: std::unique_ptr HSAMetadataDoc = std::make_unique(); @@ -108,7 +111,8 @@ bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; - void begin(const Module &Mod) override; + void begin(const Module &Mod, + const IsaInfo::AMDGPUTargetID &TargetID) override; void end() override; @@ -116,6 +120,21 @@ const SIProgramInfo &ProgramInfo) override; }; +// TODO: Rename MetadataStreamerV4 -> MetadataStreamerMsgPackV4. +class MetadataStreamerV4 final : public MetadataStreamerV3 { + void emitVersion(); + + void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID); + +public: + MetadataStreamerV4() = default; + ~MetadataStreamerV4() = default; + + void begin(const Module &Mod, + const IsaInfo::AMDGPUTargetID &TargetID) override; +}; + +// TODO: Rename MetadataStreamerV2 -> MetadataStreamerYamlV2. class MetadataStreamerV2 final : public MetadataStreamer { private: Metadata HSAMetadata; @@ -172,7 +191,8 @@ bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; - void begin(const Module &Mod) override; + void begin(const Module &Mod, + const IsaInfo::AMDGPUTargetID &TargetID) override; void end() override; Index: llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -226,8 +226,8 @@ void MetadataStreamerV2::emitVersion() { auto &Version = HSAMetadata.mVersion; - Version.push_back(VersionMajor); - Version.push_back(VersionMinor); + Version.push_back(VersionMajorV2); + Version.push_back(VersionMinorV2); } void MetadataStreamerV2::emitPrintf(const Module &Mod) { @@ -435,7 +435,8 @@ return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); } -void MetadataStreamerV2::begin(const Module &Mod) { +void MetadataStreamerV2::begin(const Module &Mod, + const IsaInfo::AMDGPUTargetID &TargetID) { emitVersion(); emitPrintf(Mod); } @@ -608,8 +609,8 @@ void MetadataStreamerV3::emitVersion() { auto Version = HSAMetadataDoc->getArrayNode(); - Version.push_back(Version.getDocument()->getNode(VersionMajor)); - Version.push_back(Version.getDocument()->getNode(VersionMinor)); + Version.push_back(Version.getDocument()->getNode(VersionMajorV3)); + Version.push_back(Version.getDocument()->getNode(VersionMinorV3)); getRootMetadata("amdhsa.version") = Version; } @@ -881,7 +882,8 @@ return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); } -void MetadataStreamerV3::begin(const Module &Mod) { +void MetadataStreamerV3::begin(const Module &Mod, + const IsaInfo::AMDGPUTargetID &TargetID) { emitVersion(); emitPrintf(Mod); getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); @@ -921,6 +923,30 @@ Kernels.push_back(Kern); } +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV4 +//===----------------------------------------------------------------------===// + +void MetadataStreamerV4::emitVersion() { + auto Version = HSAMetadataDoc->getArrayNode(); + Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); + Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); + getRootMetadata("amdhsa.version") = Version; +} + +void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) { + getRootMetadata("amdhsa.target") = + HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); +} + +void MetadataStreamerV4::begin(const Module &Mod, + const IsaInfo::AMDGPUTargetID &TargetID) { + emitVersion(); + emitTargetID(TargetID); + emitPrintf(Mod); + getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); +} + } // end namespace HSAMD } // end namespace AMDGPU } // end namespace llvm Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -183,6 +183,12 @@ bool legalizeTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + bool legalizeTrapEndpgm(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeTrapHsaQueuePtr(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeTrapHsa(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; bool legalizeDebugTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -18,7 +18,9 @@ #include "AMDGPUInstrInfo.h" #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -4516,27 +4518,55 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - // Is non-HSA path or trap-handler disabled? then, insert s_endpgm instruction - if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || - !ST.isTrapHandlerEnabled()) { - B.buildInstr(AMDGPU::S_ENDPGM).addImm(0); - } else { - // Pass queue pointer to trap handler as input, and insert trap instruction - // Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi - MachineRegisterInfo &MRI = *B.getMRI(); + if (!ST.isTrapHandlerEnabled() || + ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) + return legalizeTrapEndpgm(MI, MRI, B); + + if (const auto &&HsaAbiVer = AMDGPU::getHsaAbiVersion(&ST)) { + switch (HsaAbiVer.getValue()) { + case ELF::ELFABIVERSION_AMDGPU_HSA_V2: + case ELF::ELFABIVERSION_AMDGPU_HSA_V3: + return legalizeTrapHsaQueuePtr(MI, MRI, B); + case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + return ST.supportsGetDoorbellID() ? + legalizeTrapHsa(MI, MRI, B) : + legalizeTrapHsaQueuePtr(MI, MRI, B); + } + } - Register LiveIn = - MRI.createGenericVirtualRegister(LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); - if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR)) - return false; + llvm_unreachable("Unknown trap handler"); +} - Register SGPR01(AMDGPU::SGPR0_SGPR1); - B.buildCopy(SGPR01, LiveIn); - B.buildInstr(AMDGPU::S_TRAP) - .addImm(GCNSubtarget::TrapIDLLVMTrap) - .addReg(SGPR01, RegState::Implicit); - } +bool AMDGPULegalizerInfo::legalizeTrapEndpgm( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { + B.buildInstr(AMDGPU::S_ENDPGM).addImm(0); + MI.eraseFromParent(); + return true; +} +bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { + // Pass queue pointer to trap handler as input, and insert trap instruction + // Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi + Register LiveIn = + MRI.createGenericVirtualRegister(LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); + if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR)) + return false; + + Register SGPR01(AMDGPU::SGPR0_SGPR1); + B.buildCopy(SGPR01, LiveIn); + B.buildInstr(AMDGPU::S_TRAP) + .addImm(static_cast(GCNSubtarget::TrapID::LLVMAMDHSATrap)) + .addReg(SGPR01, RegState::Implicit); + + MI.eraseFromParent(); + return true; +} + +bool AMDGPULegalizerInfo::legalizeTrapHsa( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { + B.buildInstr(AMDGPU::S_TRAP) + .addImm(static_cast(GCNSubtarget::TrapID::LLVMAMDHSATrap)); MI.eraseFromParent(); return true; } @@ -4545,8 +4575,8 @@ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { // Is non-HSA path or trap-handler disabled? then, report a warning // accordingly - if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || - !ST.isTrapHandlerEnabled()) { + if (!ST.isTrapHandlerEnabled() || + ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) { DiagnosticInfoUnsupported NoTrap(B.getMF().getFunction(), "debugtrap handler not supported", MI.getDebugLoc(), DS_Warning); @@ -4554,7 +4584,8 @@ Ctx.diagnose(NoTrap); } else { // Insert debug-trap instruction - B.buildInstr(AMDGPU::S_TRAP).addImm(GCNSubtarget::TrapIDLLVMDebugTrap); + B.buildInstr(AMDGPU::S_TRAP) + .addImm(static_cast(GCNSubtarget::TrapID::LLVMAMDHSADebugTrap)); } MI.eraseFromParent(); Index: llvm/lib/Target/AMDGPU/AMDGPUPTNote.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPTNote.h +++ llvm/lib/Target/AMDGPU/AMDGPUPTNote.h @@ -26,22 +26,6 @@ const char NoteNameV2[] = "AMD"; const char NoteNameV3[] = "AMDGPU"; -// TODO: Remove this file once we drop code object v2. -enum NoteType{ - NT_AMDGPU_HSA_RESERVED_0 = 0, - NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1, - NT_AMDGPU_HSA_HSAIL = 2, - NT_AMDGPU_HSA_ISA = 3, - NT_AMDGPU_HSA_PRODUCER = 4, - NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5, - NT_AMDGPU_HSA_EXTENSION = 6, - NT_AMDGPU_HSA_RESERVED_7 = 7, - NT_AMDGPU_HSA_RESERVED_8 = 8, - NT_AMDGPU_HSA_RESERVED_9 = 9, - NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101, - NT_AMDGPU_HSA_HLDEBUG_TARGET = 102 -}; - } // End namespace ElfNote } // End namespace AMDGPU } // End namespace llvm Index: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1223,7 +1223,8 @@ bool ParseDirectiveHSACodeObjectISA(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); - bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; + // TODO: Possibly make subtargetHasRegister const. + bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); bool ParseDirectiveAMDGPUHsaKernel(); bool ParseDirectiveISAVersion(); @@ -1302,7 +1303,7 @@ // AsmParser::parseDirectiveSet() cannot be specialized for specific target. AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); MCContext &Ctx = getContext(); - if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { + if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); @@ -1319,7 +1320,7 @@ Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); } - if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { + if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { initializeGprCountSymbol(IS_VGPR); initializeGprCountSymbol(IS_SGPR); } else @@ -1327,10 +1328,6 @@ } } - bool hasXNACK() const { - return AMDGPU::hasXNACK(getSTI()); - } - bool hasMIMG_R128() const { return AMDGPU::hasMIMG_R128(getSTI()); } @@ -1589,6 +1586,8 @@ void lex(); public: + void onBeginOfFile() override; + OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); @@ -2723,7 +2722,7 @@ if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { return nullptr; } - if (isHsaAbiVersion3(&getSTI())) { + if (isHsaAbiVersion3Or4(&getSTI())) { if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) return nullptr; } else @@ -4346,21 +4345,19 @@ if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) return TokError("directive only supported for amdgcn architecture"); - std::string Target; - - SMLoc TargetStart = getLoc(); - if (getParser().parseEscapedString(Target)) + std::string TargetIDDirective; + SMLoc TargetStart = getTok().getLoc(); + if (getParser().parseEscapedString(TargetIDDirective)) return true; - SMRange TargetRange = SMRange(TargetStart, getLoc()); - - std::string ExpectedTarget; - raw_string_ostream ExpectedTargetOS(ExpectedTarget); - IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); - if (Target != ExpectedTargetOS.str()) - return Error(TargetRange.Start, "target must match options", TargetRange); + SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); + if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) + return getParser().Error(TargetRange.Start, + (Twine(".amdgcn_target directive's target id ") + + Twine(TargetIDDirective) + + Twine(" does not match the specified target id ") + + Twine(getTargetStreamer().getTargetID()->toString())).str()); - getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); return false; } @@ -4433,7 +4430,6 @@ unsigned UserSGPRCount = 0; bool ReserveVCC = true; bool ReserveFlatScr = true; - bool ReserveXNACK = hasXNACK(); Optional EnableWavefrontSize32; while (true) { @@ -4476,6 +4472,10 @@ if (!isUInt(Val)) return OutOfRangeError(ValRange); KD.private_segment_fixed_size = Val; + } else if (ID == ".amdhsa_kernarg_size") { + if (!isUInt(Val)) + return OutOfRangeError(ValRange); + KD.kernarg_size = Val; } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, @@ -4575,7 +4575,9 @@ return Error(IDRange.Start, "directive requires gfx8+", IDRange); if (!isUInt<1>(Val)) return OutOfRangeError(ValRange); - ReserveXNACK = Val; + if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) + return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", + IDRange); } else if (ID == ".amdhsa_float_round_mode_32") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); @@ -4666,7 +4668,8 @@ unsigned VGPRBlocks; unsigned SGPRBlocks; if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, - ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, + getTargetStreamer().getTargetID()->isXnackOnOrAny(), + EnableWavefrontSize32, NextFreeVGPR, VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, SGPRBlocks)) return true; @@ -4703,7 +4706,7 @@ getTargetStreamer().EmitAmdhsaKernelDescriptor( getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, - ReserveFlatScr, ReserveXNACK); + ReserveFlatScr); return false; } @@ -4729,9 +4732,9 @@ // targeted GPU. if (isToken(AsmToken::EndOfStatement)) { AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); - getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, - ISA.Stepping, - "AMD", "AMDGPU"); + getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, + ISA.Stepping, + "AMD", "AMDGPU"); return false; } @@ -4756,8 +4759,8 @@ if (!parseString(ArchName, "invalid arch name")) return true; - getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, - VendorName, ArchName); + getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, + VendorName, ArchName); return false; } @@ -4866,19 +4869,11 @@ "architectures"); } - auto ISAVersionStringFromASM = getToken().getStringContents(); - - std::string ISAVersionStringFromSTI; - raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); - IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); + auto TargetIDDirective = getLexer().getTok().getStringContents(); + if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) + return Error(getParser().getTok().getLoc(), "target id must match options"); - if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { - return Error(getLoc(), - ".amd_amdgpu_isa directive does not match triple and/or mcpu " - "arguments specified through the command line"); - } - - getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); + getTargetStreamer().EmitISAVersion(); Lex(); return false; @@ -4888,7 +4883,7 @@ const char *AssemblerDirectiveBegin; const char *AssemblerDirectiveEnd; std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = - isHsaAbiVersion3(&getSTI()) + isHsaAbiVersion3Or4(&getSTI()) ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, HSAMD::V3::AssemblerDirectiveEnd) : std::make_tuple(HSAMD::AssemblerDirectiveBegin, @@ -4905,7 +4900,7 @@ HSAMetadataString)) return true; - if (isHsaAbiVersion3(&getSTI())) { + if (isHsaAbiVersion3Or4(&getSTI())) { if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) return Error(getLoc(), "invalid HSA metadata"); } else { @@ -5055,12 +5050,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); - if (isHsaAbiVersion3(&getSTI())) { - if (IDVal == ".amdgcn_target") - return ParseDirectiveAMDGCNTarget(); - + if (isHsaAbiVersion3Or4(&getSTI())) { if (IDVal == ".amdhsa_kernel") - return ParseDirectiveAMDHSAKernel(); + return ParseDirectiveAMDHSAKernel(); // TODO: Restructure/combine with PAL metadata directive. if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) @@ -5085,6 +5077,9 @@ return ParseDirectiveHSAMetadata(); } + if (IDVal == ".amdgcn_target") + return ParseDirectiveAMDGCNTarget(); + if (IDVal == ".amdgpu_lds") return ParseDirectiveAMDGPULDS(); @@ -5098,7 +5093,7 @@ } bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, - unsigned RegNo) const { + unsigned RegNo) { for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); R.isValid(); ++R) { @@ -5130,7 +5125,7 @@ case AMDGPU::XNACK_MASK: case AMDGPU::XNACK_MASK_LO: case AMDGPU::XNACK_MASK_HI: - return (isVI() || isGFX9()) && hasXNACK(); + return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); case AMDGPU::SGPR_NULL: return isGFX10Plus(); default: @@ -7138,6 +7133,17 @@ {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} }; +void AMDGPUAsmParser::onBeginOfFile() { + if (getSTI().getTargetTriple().getArch() == Triple::r600) + return; + + if (!getTargetStreamer().getTargetID()) + getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); + + if (isHsaAbiVersion3Or4(&getSTI())) + getTargetStreamer().EmitDirectiveAMDGCNTarget(); +} + OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { OperandMatchResultTy res = parseOptionalOpr(Operands); Index: llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1623,7 +1623,6 @@ uint16_t TwoByteBuffer = 0; uint32_t FourByteBuffer = 0; - uint64_t EightByteBuffer = 0; StringRef ReservedBytes; StringRef Indent = "\t"; @@ -1644,11 +1643,19 @@ << FourByteBuffer << '\n'; return MCDisassembler::Success; + case amdhsa::KERNARG_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_kernarg_size " + << FourByteBuffer << '\n'; + return MCDisassembler::Success; + case amdhsa::RESERVED0_OFFSET: - // 8 reserved bytes, must be 0. - EightByteBuffer = DE.getU64(Cursor); - if (EightByteBuffer) { - return MCDisassembler::Fail; + // 4 reserved bytes, must be 0. + ReservedBytes = DE.getBytes(Cursor, 4); + for (int I = 0; I < 4; ++I) { + if (ReservedBytes[I] != 0) { + return MCDisassembler::Fail; + } } return MCDisassembler::Success; Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -41,24 +41,16 @@ using AMDGPUSubtarget::getMaxWavesPerEU; public: - enum TrapHandlerAbi { - TrapHandlerAbiNone = 0, - TrapHandlerAbiHsa = 1 + // Following 2 enums are documented at: + // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi + enum class TrapHandlerAbi { + NONE = 0x00, + AMDHSA = 0x01, }; - enum TrapID { - TrapIDHardwareReserved = 0, - TrapIDHSADebugTrap = 1, - TrapIDLLVMTrap = 2, - TrapIDLLVMDebugTrap = 3, - TrapIDDebugBreakpoint = 7, - TrapIDDebugReserved8 = 8, - TrapIDDebugReservedFE = 0xfe, - TrapIDDebugReservedFF = 0xff - }; - - enum TrapRegValues { - LLVMTrapHandlerRegValue = 1 + enum class TrapID { + LLVMAMDHSATrap = 0x02, + LLVMAMDHSADebugTrap = 0x03, }; private: @@ -253,6 +245,10 @@ return RegBankInfo.get(); } + const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const { + return TargetID; + } + // Nothing implemented, just prevent crashes on use. const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { return &TSInfo; @@ -386,7 +382,12 @@ } TrapHandlerAbi getTrapHandlerAbi() const { - return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; + return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE; + } + + bool supportsGetDoorbellID() const { + // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets. + return getGeneration() >= GFX9; } /// True if the offset field of DS instructions works as expected. On SI, the Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H +#include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDGPUPALMetadata.h" #include "llvm/MC/MCStreamer.h" @@ -39,6 +40,9 @@ AMDGPUPALMetadata PALMetadata; protected: + // TODO: Move HSAMetadataStream to AMDGPUTargetStreamer. + Optional TargetID; + MCContext &getContext() const { return Streamer.getContext(); } public: @@ -46,15 +50,15 @@ AMDGPUPALMetadata *getPALMetadata() { return &PALMetadata; } - virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0; + virtual void EmitDirectiveAMDGCNTarget() = 0; virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) = 0; - virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, - uint32_t Stepping, - StringRef VendorName, - StringRef ArchName) = 0; + virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, + uint32_t Stepping, + StringRef VendorName, + StringRef ArchName) = 0; virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0; @@ -64,7 +68,7 @@ Align Alignment) = 0; /// \returns True on success, false on failure. - virtual bool EmitISAVersion(StringRef IsaVersionString) = 0; + virtual bool EmitISAVersion() = 0; /// \returns True on success, false on failure. virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString); @@ -90,11 +94,27 @@ virtual void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, - bool ReserveXNACK) = 0; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) = 0; static StringRef getArchNameFromElfMach(unsigned ElfMach); static unsigned getElfMach(StringRef GPU); + + const Optional &getTargetID() const { + return TargetID; + } + Optional &getTargetID() { + return TargetID; + } + void initializeTargetID(const MCSubtargetInfo &STI) { + assert(TargetID == None && "TargetID can only be initialized once"); + TargetID.emplace(STI); + } + void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString) { + initializeTargetID(STI); + + assert(getTargetID() != None && "TargetID is None"); + getTargetID()->setTargetIDFromFeaturesString(FeatureString); + } }; class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { @@ -104,14 +124,14 @@ void finish() override; - void EmitDirectiveAMDGCNTarget(StringRef Target) override; + void EmitDirectiveAMDGCNTarget() override; void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) override; - void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, - uint32_t Stepping, StringRef VendorName, - StringRef ArchName) override; + void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, + uint32_t Stepping, StringRef VendorName, + StringRef ArchName) override; void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override; @@ -120,7 +140,7 @@ void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override; /// \returns True on success, false on failure. - bool EmitISAVersion(StringRef IsaVersionString) override; + bool EmitISAVersion() override; /// \returns True on success, false on failure. bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override; @@ -134,17 +154,29 @@ void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, - bool ReserveXNACK) override; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; }; class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { + const MCSubtargetInfo &STI; MCStreamer &Streamer; - Triple::OSType Os; void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType, function_ref EmitDesc); + unsigned getEFlags(); + + unsigned getEFlagsR600(); + unsigned getEFlagsAMDGCN(); + + unsigned getEFlagsUnknownOS(); + unsigned getEFlagsAMDHSA(); + unsigned getEFlagsAMDPAL(); + unsigned getEFlagsMesa3D(); + + unsigned getEFlagsV3(); + unsigned getEFlagsV4(); + public: AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); @@ -152,14 +184,14 @@ void finish() override; - void EmitDirectiveAMDGCNTarget(StringRef Target) override; + void EmitDirectiveAMDGCNTarget() override; void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) override; - void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, - uint32_t Stepping, StringRef VendorName, - StringRef ArchName) override; + void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, + uint32_t Stepping, StringRef VendorName, + StringRef ArchName) override; void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override; @@ -168,7 +200,7 @@ void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override; /// \returns True on success, false on failure. - bool EmitISAVersion(StringRef IsaVersionString) override; + bool EmitISAVersion() override; /// \returns True on success, false on failure. bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override; @@ -182,8 +214,7 @@ void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, - bool ReserveXNACK) override; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; }; } Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -31,6 +31,20 @@ // AMDGPUTargetStreamer //===----------------------------------------------------------------------===// +static void convertIsaVersionV2(uint32_t &Major, uint32_t &Minor, + uint32_t &Stepping, bool Sramecc, bool Xnack) { + if (Major == 9 && Minor == 0) { + switch (Stepping) { + case 0: + case 2: + case 4: + case 6: + if (Xnack) + Stepping++; + } + } +} + bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) { HSAMD::Metadata HSAMetadata; if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) @@ -182,8 +196,8 @@ getPALMetadata()->reset(); } -void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) { - OS << "\t.amdgcn_target \"" << Target << "\"\n"; +void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() { + OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n"; } void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion( @@ -193,15 +207,14 @@ } void -AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, - uint32_t Minor, - uint32_t Stepping, - StringRef VendorName, - StringRef ArchName) { - OS << "\t.hsa_code_object_isa " << - Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) << - ",\"" << VendorName << "\",\"" << ArchName << "\"\n"; - +AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major, + uint32_t Minor, + uint32_t Stepping, + StringRef VendorName, + StringRef ArchName) { + convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny()); + OS << "\t.hsa_code_object_isa " << Twine(Major) << "," << Twine(Minor) << "," + << Twine(Stepping) << ",\"" << VendorName << "\",\"" << ArchName << "\"\n"; } void @@ -227,8 +240,8 @@ << Alignment.value() << '\n'; } -bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) { - OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n"; +bool AMDGPUTargetAsmStreamer::EmitISAVersion() { + OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n"; return true; } @@ -279,7 +292,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, - bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) { + bool ReserveVCC, bool ReserveFlatScr) { IsaVersion IVersion = getIsaVersion(STI.getCPU()); OS << "\t.amdhsa_kernel " << KernelName << '\n'; @@ -292,6 +305,7 @@ << '\n'; OS << "\t\t.amdhsa_private_segment_fixed_size " << KD.private_segment_fixed_size << '\n'; + OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n'; PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, kernel_code_properties, @@ -352,8 +366,20 @@ OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n'; if (IVersion.Major >= 7 && !ReserveFlatScr) OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n'; - if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI)) - OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n'; + + if (const auto &&HsaAbiVer = getHsaAbiVersion(&STI)) { + switch (HsaAbiVer.getValue()) { + default: + break; + case ELF::ELFABIVERSION_AMDGPU_HSA_V2: + break; + case ELF::ELFABIVERSION_AMDGPU_HSA_V3: + case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + if (getTargetID()->isXnackSupported()) + OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n'; + break; + } + } PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD, compute_pgm_rsrc1, @@ -426,23 +452,7 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) - : AMDGPUTargetStreamer(S), Streamer(S), Os(STI.getTargetTriple().getOS()) { - MCAssembler &MCA = getStreamer().getAssembler(); - unsigned EFlags = MCA.getELFHeaderEFlags(); - - EFlags &= ~ELF::EF_AMDGPU_MACH; - EFlags |= getElfMach(STI.getCPU()); - - EFlags &= ~ELF::EF_AMDGPU_XNACK; - if (AMDGPU::hasXNACK(STI)) - EFlags |= ELF::EF_AMDGPU_XNACK; - - EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC; - if (AMDGPU::hasSRAMECC(STI)) - EFlags |= ELF::EF_AMDGPU_SRAM_ECC; - - MCA.setELFHeaderEFlags(EFlags); -} + : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {} MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { return static_cast(Streamer); @@ -452,6 +462,9 @@ // We use it for emitting the accumulated PAL metadata as a .note record. // The PAL metadata is reset after it is emitted. void AMDGPUTargetELFStreamer::finish() { + MCAssembler &MCA = getStreamer().getAssembler(); + MCA.setELFHeaderEFlags(getEFlags()); + std::string Blob; const char *Vendor = getPALMetadata()->getVendor(); unsigned Type = getPALMetadata()->getType(); @@ -477,7 +490,7 @@ unsigned NoteFlags = 0; // TODO Apparently, this is currently needed for OpenCL as mentioned in // https://reviews.llvm.org/D74995 - if (Os == Triple::AMDHSA) + if (STI.getTargetTriple().getOS() == Triple::AMDHSA) NoteFlags = ELF::SHF_ALLOC; S.PushSection(); @@ -493,24 +506,150 @@ S.PopSection(); } -void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {} +unsigned AMDGPUTargetELFStreamer::getEFlags() { + switch (STI.getTargetTriple().getArch()) { + default: + llvm_unreachable("Unsupported Arch"); + case Triple::r600: + return getEFlagsR600(); + case Triple::amdgcn: + return getEFlagsAMDGCN(); + } +} + +unsigned AMDGPUTargetELFStreamer::getEFlagsR600() { + assert(STI.getTargetTriple().getArch() == Triple::r600); + + return getElfMach(STI.getCPU()); +} + +unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() { + assert(STI.getTargetTriple().getArch() == Triple::amdgcn); + + switch (STI.getTargetTriple().getOS()) { + default: + // TODO: Why are some tests have "mingw" listed as OS? + // llvm_unreachable("Unsupported OS"); + case Triple::UnknownOS: + return getEFlagsUnknownOS(); + case Triple::AMDHSA: + return getEFlagsAMDHSA(); + case Triple::AMDPAL: + return getEFlagsAMDPAL(); + case Triple::Mesa3D: + return getEFlagsMesa3D(); + } +} + +unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() { + // TODO: Why are some tests have "mingw" listed as OS? + // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS); + + return getEFlagsV3(); +} + +unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() { + assert(STI.getTargetTriple().getOS() == Triple::AMDHSA); + + if (const auto &&HsaAbiVer = getHsaAbiVersion(&STI)) { + switch (HsaAbiVer.getValue()) { + case ELF::ELFABIVERSION_AMDGPU_HSA_V2: + case ELF::ELFABIVERSION_AMDGPU_HSA_V3: + return getEFlagsV3(); + case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + return getEFlagsV4(); + } + } + + llvm_unreachable("HSA OS ABI Version identification must be defined"); +} + +unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() { + assert(STI.getTargetTriple().getOS() == Triple::AMDPAL); + + return getEFlagsV3(); +} + +unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() { + assert(STI.getTargetTriple().getOS() == Triple::Mesa3D); + + return getEFlagsV3(); +} + +unsigned AMDGPUTargetELFStreamer::getEFlagsV3() { + unsigned EFlagsV3 = 0; + + // mach. + EFlagsV3 |= getElfMach(STI.getCPU()); + + // xnack. + if (getTargetID()->isXnackOnOrAny()) + EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3; + // sramecc. + if (getTargetID()->isSramEccOnOrAny()) + EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3; + + return EFlagsV3; +} + +unsigned AMDGPUTargetELFStreamer::getEFlagsV4() { + unsigned EFlagsV4 = 0; + + // mach. + EFlagsV4 |= getElfMach(STI.getCPU()); + + // xnack. + switch (getTargetID()->getXnackSetting()) { + case AMDGPU::IsaInfo::TargetIDSetting::Unsupported: + EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4; + break; + case AMDGPU::IsaInfo::TargetIDSetting::Any: + EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4; + break; + case AMDGPU::IsaInfo::TargetIDSetting::Off: + EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4; + break; + case AMDGPU::IsaInfo::TargetIDSetting::On: + EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4; + break; + } + // sramecc. + switch (getTargetID()->getSramEccSetting()) { + case AMDGPU::IsaInfo::TargetIDSetting::Unsupported: + EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4; + break; + case AMDGPU::IsaInfo::TargetIDSetting::Any: + EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4; + break; + case AMDGPU::IsaInfo::TargetIDSetting::Off: + EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4; + break; + case AMDGPU::IsaInfo::TargetIDSetting::On: + EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4; + break; + } + + return EFlagsV4; +} + +void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {} void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion( uint32_t Major, uint32_t Minor) { EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()), - ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) { + ELF::NT_AMD_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) { OS.emitInt32(Major); OS.emitInt32(Minor); }); } void -AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, - uint32_t Minor, - uint32_t Stepping, - StringRef VendorName, - StringRef ArchName) { +AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major, + uint32_t Minor, + uint32_t Stepping, + StringRef VendorName, + StringRef ArchName) { uint16_t VendorNameSize = VendorName.size() + 1; uint16_t ArchNameSize = ArchName.size() + 1; @@ -518,8 +657,9 @@ sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + VendorNameSize + ArchNameSize; + convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny()); EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()), - ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) { + ELF::NT_AMD_HSA_ISA_VERSION, [&](MCELFStreamer &OS) { OS.emitInt16(VendorNameSize); OS.emitInt16(ArchNameSize); OS.emitInt32(Major); @@ -567,7 +707,7 @@ SymbolELF->setSize(MCConstantExpr::create(Size, getContext())); } -bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) { +bool AMDGPUTargetELFStreamer::EmitISAVersion() { // Create two labels to mark the beginning and end of the desc field // and a MCExpr to calculate the size of the desc field. auto &Context = getContext(); @@ -577,10 +717,10 @@ MCSymbolRefExpr::create(DescEnd, Context), MCSymbolRefExpr::create(DescBegin, Context), Context); - EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA, + EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_ISA_NAME, [&](MCELFStreamer &OS) { OS.emitLabel(DescBegin); - OS.emitBytes(IsaVersionString); + OS.emitBytes(getTargetID()->toString()); OS.emitLabel(DescEnd); }); return true; @@ -628,7 +768,7 @@ MCSymbolRefExpr::create(DescEnd, Context), MCSymbolRefExpr::create(DescBegin, Context), Context); - EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA, + EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_METADATA, [&](MCELFStreamer &OS) { OS.emitLabel(DescBegin); OS.emitBytes(HSAMetadataString); @@ -660,8 +800,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, - bool ReserveXNACK) { + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) { auto &Streamer = getStreamer(); auto &Context = Streamer.getContext(); @@ -688,8 +827,11 @@ Streamer.emitLabel(KernelDescriptorSymbol); Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size); Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size); + Streamer.emitInt32(KernelDescriptor.kernarg_size); + for (uint8_t Res : KernelDescriptor.reserved0) Streamer.emitInt8(Res); + // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The // expression being created is: // (start of kernel code) - (start of kernel descriptor) Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -144,7 +144,11 @@ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerTrapEndpgm(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const; SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -19,6 +19,7 @@ #include "SIRegisterInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" @@ -5149,12 +5150,35 @@ } SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { + if (!Subtarget->isTrapHandlerEnabled() || + Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) + return lowerTrapEndpgm(Op, DAG); + + if (const auto &&HsaAbiVer = AMDGPU::getHsaAbiVersion(Subtarget)) { + switch (HsaAbiVer.getValue()) { + case ELF::ELFABIVERSION_AMDGPU_HSA_V2: + case ELF::ELFABIVERSION_AMDGPU_HSA_V3: + return lowerTrapHsaQueuePtr(Op, DAG); + case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + return Subtarget->supportsGetDoorbellID() ? + lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG); + } + } + + llvm_unreachable("Unknown trap handler"); +} + +SDValue SITargetLowering::lowerTrapEndpgm( + SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Chain = Op.getOperand(0); + return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); +} - if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || - !Subtarget->isTrapHandlerEnabled()) - return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); +SDValue SITargetLowering::lowerTrapHsaQueuePtr( + SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Chain = Op.getOperand(0); MachineFunction &MF = DAG.getMachineFunction(); SIMachineFunctionInfo *Info = MF.getInfo(); @@ -5165,22 +5189,37 @@ SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64); SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01, QueuePtr, SDValue()); + + uint64_t TrapID = static_cast(GCNSubtarget::TrapID::LLVMAMDHSATrap); SDValue Ops[] = { ToReg, - DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMTrap, SL, MVT::i16), + DAG.getTargetConstant(TrapID, SL, MVT::i16), SGPR01, ToReg.getValue(1) }; return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); } +SDValue SITargetLowering::lowerTrapHsa( + SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Chain = Op.getOperand(0); + + uint64_t TrapID = static_cast(GCNSubtarget::TrapID::LLVMAMDHSATrap); + SDValue Ops[] = { + Chain, + DAG.getTargetConstant(TrapID, SL, MVT::i16) + }; + return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); +} + SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Chain = Op.getOperand(0); MachineFunction &MF = DAG.getMachineFunction(); - if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || - !Subtarget->isTrapHandlerEnabled()) { + if (!Subtarget->isTrapHandlerEnabled() || + Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) { DiagnosticInfoUnsupported NoTrap(MF.getFunction(), "debugtrap handler not supported", Op.getDebugLoc(), @@ -5190,9 +5229,10 @@ return Chain; } + uint64_t TrapID = static_cast(GCNSubtarget::TrapID::LLVMAMDHSADebugTrap); SDValue Ops[] = { Chain, - DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16) + DAG.getTargetConstant(TrapID, SL, MVT::i16) }; return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1377,11 +1377,6 @@ int NONE = 0; } -def TRAPID{ - int LLVM_TRAP = 2; - int LLVM_DEBUG_TRAP = 3; -} - def HWREG { int MODE = 1; int STATUS = 2; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -44,6 +44,12 @@ /// \returns True if HSA OS ABI Version identification is 3, /// false otherwise. bool isHsaAbiVersion3(const MCSubtargetInfo *STI); +/// \returns True if HSA OS ABI Version identification is 4, +/// false otherwise. +bool isHsaAbiVersion4(const MCSubtargetInfo *STI); +/// \returns True if HSA OS ABI Version identification is 3 or 4, +/// false otherwise. +bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI); struct GcnBufferFormatInfo { unsigned Format; @@ -78,6 +84,7 @@ class AMDGPUTargetID { private: + const MCSubtargetInfo &STI; TargetIDSetting XnackSetting; TargetIDSetting SramEccSetting; @@ -145,10 +152,10 @@ void setTargetIDFromFeaturesString(StringRef FS); void setTargetIDFromTargetIDStream(StringRef TargetID); -}; -/// Streams isa version string for given subtarget \p STI into \p Stream. -void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); + /// \returns String representation of an object. + std::string toString() const; +}; /// \returns Wavefront size for given subtarget \p STI. unsigned getWavefrontSize(const MCSubtargetInfo *STI); Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -30,7 +30,8 @@ static llvm::cl::opt AmdhsaCodeObjectVersion( "amdhsa-code-object-version", llvm::cl::Hidden, - llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(3)); + llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4), + llvm::cl::ZeroOrMore); namespace { @@ -96,8 +97,11 @@ return ELF::ELFABIVERSION_AMDGPU_HSA_V2; case 3: return ELF::ELFABIVERSION_AMDGPU_HSA_V3; + case 4: + return ELF::ELFABIVERSION_AMDGPU_HSA_V4; default: - return ELF::ELFABIVERSION_AMDGPU_HSA_V3; + report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") + + Twine(AmdhsaCodeObjectVersion)); } } @@ -113,6 +117,16 @@ return false; } +bool isHsaAbiVersion4(const MCSubtargetInfo *STI) { + if (const auto &&HsaAbiVer = getHsaAbiVersion(STI)) + return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V4; + return false; +} + +bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) { + return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI); +} + #define GET_MIMGBaseOpcodesTable_IMPL #define GET_MIMGDimInfoTable_IMPL #define GET_MIMGInfoTable_IMPL @@ -247,7 +261,8 @@ namespace IsaInfo { AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) - : XnackSetting(TargetIDSetting::Any), SramEccSetting(TargetIDSetting::Any) { + : STI(STI), XnackSetting(TargetIDSetting::Any), + SramEccSetting(TargetIDSetting::Any) { if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) XnackSetting = TargetIDSetting::Unsupported; if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) @@ -334,25 +349,104 @@ } } -void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { - auto TargetTriple = STI->getTargetTriple(); - auto Version = getIsaVersion(STI->getCPU()); +std::string AMDGPUTargetID::toString() const { + std::string StringRep = ""; + raw_string_ostream StreamRep(StringRep); + + auto TargetTriple = STI.getTargetTriple(); + auto Version = getIsaVersion(STI.getCPU()); - Stream << TargetTriple.getArchName() << '-' - << TargetTriple.getVendorName() << '-' - << TargetTriple.getOSName() << '-' - << TargetTriple.getEnvironmentName() << '-' - << "gfx" - << Version.Major - << Version.Minor - << hexdigit(Version.Stepping, true); + StreamRep << TargetTriple.getArchName() << '-' + << TargetTriple.getVendorName() << '-' + << TargetTriple.getOSName() << '-' + << TargetTriple.getEnvironmentName() << '-'; + + std::string Processor = ""; + // TODO: Following else statement is present here because we used various + // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). + // Remove once all aliases are removed from GCNProcessors.td. + if (Version.Major >= 9) + Processor = STI.getCPU().str(); + else + Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + + Twine(Version.Stepping)) + .str(); + + std::string Features = ""; + if (const auto &&HsaAbiVersion = getHsaAbiVersion(&STI)) { + switch (HsaAbiVersion.getValue()) { + case ELF::ELFABIVERSION_AMDGPU_HSA_V2: + // Code object V2 only supported specific processors and had fixed + // settings for the XNACK. + if (Processor == "gfx600") { + } else if (Processor == "gfx601") { + } else if (Processor == "gfx602") { + } else if (Processor == "gfx700") { + } else if (Processor == "gfx701") { + } else if (Processor == "gfx702") { + } else if (Processor == "gfx703") { + } else if (Processor == "gfx704") { + } else if (Processor == "gfx705") { + } else if (Processor == "gfx801") { + if (!isXnackOnOrAny()) + report_fatal_error( + "AMD GPU code object V2 does not support processor " + Processor + + " without XNACK"); + } else if (Processor == "gfx802") { + } else if (Processor == "gfx803") { + } else if (Processor == "gfx805") { + } else if (Processor == "gfx810") { + if (!isXnackOnOrAny()) + report_fatal_error( + "AMD GPU code object V2 does not support processor " + Processor + + " without XNACK"); + } else if (Processor == "gfx900") { + if (isXnackOnOrAny()) + Processor = "gfx901"; + } else if (Processor == "gfx902") { + if (isXnackOnOrAny()) + Processor = "gfx903"; + } else if (Processor == "gfx904") { + if (isXnackOnOrAny()) + Processor = "gfx905"; + } else if (Processor == "gfx906") { + if (isXnackOnOrAny()) + Processor = "gfx907"; + } else { + report_fatal_error( + "AMD GPU code object V2 does not support processor " + Processor); + } + break; + case ELF::ELFABIVERSION_AMDGPU_HSA_V3: + // xnack. + if (isXnackOnOrAny()) + Features += "+xnack"; + // In code object v2 and v3, "sramecc" feature was spelled with a + // hyphen ("sram-ecc"). + if (isSramEccOnOrAny()) + Features += "+sram-ecc"; + break; + case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + // sramecc. + if (getSramEccSetting() == TargetIDSetting::Off) + Features += ":sramecc-"; + else if (getSramEccSetting() == TargetIDSetting::On) + Features += ":sramecc+"; + // xnack. + if (getXnackSetting() == TargetIDSetting::Off) + Features += ":xnack-"; + else if (getXnackSetting() == TargetIDSetting::On) + Features += ":xnack+"; + break; + default: + break; + } + } - if (hasXNACK(*STI)) - Stream << "+xnack"; - if (hasSRAMECC(*STI)) - Stream << "+sramecc"; + StreamRep << Processor << Features; - Stream.flush(); + StreamRep.flush(); + return StringRep; } unsigned getWavefrontSize(const MCSubtargetInfo *STI) { Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -95,7 +95,7 @@ const char *getVendor() const; // Get .note record type of metadata blob to be emitted: - // ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or + // ELF::NT_AMD_PAL_METADATA (legacy key=val format), or // ELF::NT_AMDGPU_METADATA (MsgPack format), or // 0 (no PAL metadata). unsigned getType() const; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -41,7 +41,7 @@ } return; } - BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA; + BlobType = ELF::NT_AMD_PAL_METADATA; NamedMD = M.getNamedMetadata("amdgpu.pal.metadata"); if (!NamedMD || !NamedMD->getNumOperands()) { // Emit msgpack metadata by default @@ -69,7 +69,7 @@ // Metadata. bool AMDGPUPALMetadata::setFromBlob(unsigned Type, StringRef Blob) { BlobType = Type; - if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA) + if (Type == ELF::NT_AMD_PAL_METADATA) return setFromLegacyBlob(Blob); return setFromMsgPackBlob(Blob); } @@ -653,7 +653,7 @@ // a .note record of the specified AMD type. Returns an empty blob if // there is no PAL metadata, void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) { - if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA) + if (Type == ELF::NT_AMD_PAL_METADATA) toLegacyBlob(Blob); else if (Type) toMsgPackBlob(Blob); @@ -790,7 +790,7 @@ } // Get .note record type of metadata blob to be emitted: -// ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or +// ELF::NT_AMD_PAL_METADATA (legacy key=val format), or // ELF::NT_AMDGPU_METADATA (MsgPack format), or // 0 (no PAL metadata). unsigned AMDGPUPALMetadata::getType() const { @@ -799,12 +799,12 @@ // Return whether the blob type is legacy PAL metadata. bool AMDGPUPALMetadata::isLegacy() const { - return BlobType == ELF::NT_AMD_AMDGPU_PAL_METADATA; + return BlobType == ELF::NT_AMD_PAL_METADATA; } // Set legacy PAL metadata format. void AMDGPUPALMetadata::setLegacy() { - BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA; + BlobType = ELF::NT_AMD_PAL_METADATA; } // Erase all PAL metadata. Index: llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll @@ -24,7 +24,6 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5] ; GFX9-NEXT: s_trap 2 ; GFX9-NEXT: ds_write_b32 v0, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -47,7 +46,6 @@ ; GFX9-LABEL: func_use_lds_global_constexpr_cast: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5] ; GFX9-NEXT: s_trap 2 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) Index: llvm/test/CodeGen/AMDGPU/and.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/and.ll +++ llvm/test/CodeGen/AMDGPU/and.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #0 Index: llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll +++ llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK-LABEL: {{^}}min_64_max_64: ; CHECK: SGPRBlocks: 0 Index: llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll +++ llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=CHECK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=HSAMD %s ; CHECK-LABEL: {{^}}min_64_max_64: @@ -129,7 +129,7 @@ } attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"} -; HSAMD: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) +; HSAMD: NT_AMD_HSA_METADATA (AMD HSA Metadata) ; HSAMD: Version: [ 1, 0 ] ; HSAMD: Kernels: ; HSAMD: - Name: min_64_max_64 Index: llvm/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir +++ llvm/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir @@ -1,5 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s -# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN %s --- # Trivial clause at beginning of program Index: llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir +++ llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir @@ -1,4 +1,4 @@ -# RUN: llc -march=amdgcn -mcpu=tonga -run-pass post-RA-sched -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=tonga -mattr=-xnack -run-pass post-RA-sched -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s # GCN: FLAT_LOAD_DWORD # GCN-NEXT: FLAT_LOAD_DWORD Index: llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -1,3 +1,90 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx600 < %s | FileCheck --check-prefixes=V3-GFX600 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tahiti < %s | FileCheck --check-prefixes=V3-GFX600 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx601 < %s | FileCheck --check-prefixes=V3-GFX601 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=pitcairn < %s | FileCheck --check-prefixes=V3-GFX601 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=verde < %s | FileCheck --check-prefixes=V3-GFX601 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx602 < %s | FileCheck --check-prefixes=V3-GFX602 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=hainan < %s | FileCheck --check-prefixes=V3-GFX602 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=oland < %s | FileCheck --check-prefixes=V3-GFX602 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx700 < %s | FileCheck --check-prefixes=V3-GFX700 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=kaveri < %s | FileCheck --check-prefixes=V3-GFX700 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx701 < %s | FileCheck --check-prefixes=V3-GFX701 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=hawaii < %s | FileCheck --check-prefixes=V3-GFX701 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx702 < %s | FileCheck --check-prefixes=V3-GFX702 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx703 < %s | FileCheck --check-prefixes=V3-GFX703 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=kabini < %s | FileCheck --check-prefixes=V3-GFX703 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=mullins < %s | FileCheck --check-prefixes=V3-GFX703 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx704 < %s | FileCheck --check-prefixes=V3-GFX704 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=bonaire < %s | FileCheck --check-prefixes=V3-GFX704 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx705 < %s | FileCheck --check-prefixes=V3-GFX705 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx802 < %s | FileCheck --check-prefixes=V3-GFX802 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=iceland < %s | FileCheck --check-prefixes=V3-GFX802 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tonga < %s | FileCheck --check-prefixes=V3-GFX802 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx803 < %s | FileCheck --check-prefixes=V3-GFX803 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=fiji < %s | FileCheck --check-prefixes=V3-GFX803 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=polaris10 < %s | FileCheck --check-prefixes=V3-GFX803 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=polaris11 < %s | FileCheck --check-prefixes=V3-GFX803 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx805 < %s | FileCheck --check-prefixes=V3-GFX805 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tongapro < %s | FileCheck --check-prefixes=V3-GFX805 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX900-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX902-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX904-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX909-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX90C-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1010-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1011-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1012-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1030 < %s | FileCheck --check-prefixes=V3-GFX1030 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1031 < %s | FileCheck --check-prefixes=V3-GFX1031 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1032 < %s | FileCheck --check-prefixes=V3-GFX1032 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1033 < %s | FileCheck --check-prefixes=V3-GFX1033 %s + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX600 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=GFX600 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx601 < %s | FileCheck --check-prefixes=GFX601 %s @@ -17,8 +104,12 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX704 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire < %s | FileCheck --check-prefixes=GFX704 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx705 < %s | FileCheck --check-prefixes=GFX705 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 < %s | FileCheck --check-prefixes=GFX801 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo < %s | FileCheck --check-prefixes=GFX801 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefixes=GFX801 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX801-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX801-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo < %s | FileCheck --check-prefixes=GFX801 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=GFX801-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=GFX801-XNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 < %s | FileCheck --check-prefixes=GFX802 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland < %s | FileCheck --check-prefixes=GFX802 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga < %s | FileCheck --check-prefixes=GFX802 %s @@ -29,23 +120,102 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx805 < %s | FileCheck --check-prefixes=GFX805 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tongapro < %s | FileCheck --check-prefixes=GFX805 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 < %s | FileCheck --check-prefixes=GFX810 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX810-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX810-XNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney < %s | FileCheck --check-prefixes=GFX810 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=GFX810-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=GFX810-XNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX900-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX900-XNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 < %s | FileCheck --check-prefixes=GFX902 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX902-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX902-XNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 < %s | FileCheck --check-prefixes=GFX904 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX904-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX904-XNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck --check-prefixes=GFX906 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=GFX906-NOSRAMECC %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=GFX906-SRAMECC %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX906-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX906-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=GFX906-NOSRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=GFX906-SRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=GFX906-NOSRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=GFX906-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX908 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=GFX908-NOSRAMECC %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=GFX908-SRAMECC %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX908-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX908-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=GFX908-NOSRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=GFX908-SRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=GFX908-NOSRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=GFX908-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 < %s | FileCheck --check-prefixes=GFX909 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX909-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX909-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=GFX90C %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=GFX90C-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=GFX90C-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 < %s | FileCheck --check-prefixes=GFX1011 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1011-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1011-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 < %s | FileCheck --check-prefixes=GFX1012 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1012-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1012-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck --check-prefixes=GFX1030 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX1031 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1032 < %s | FileCheck --check-prefixes=GFX1032 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1033 < %s | FileCheck --check-prefixes=GFX1033 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=XNACK-GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=NO-XNACK-GFX902 %s - -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+sramecc < %s | FileCheck --check-prefixes=SRAM-ECC-GFX904 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s - -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX904 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s - -; FIXME: With the default attributes these directives are not accurate for -; xnack and sramecc. Subsequent Target-ID patches will address this. +; V3-GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" +; V3-GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" +; V3-GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602" +; V3-GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" +; V3-GFX701: .amdgcn_target "amdgcn-amd-amdhsa--gfx701" +; V3-GFX702: .amdgcn_target "amdgcn-amd-amdhsa--gfx702" +; V3-GFX703: .amdgcn_target "amdgcn-amd-amdhsa--gfx703" +; V3-GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704" +; V3-GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705" +; V3-GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801" +; V3-GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801+xnack" +; V3-GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802" +; V3-GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803" +; V3-GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805" +; V3-GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810" +; V3-GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack" +; V3-GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" +; V3-GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" +; V3-GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902" +; V3-GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack" +; V3-GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904" +; V3-GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack" +; V3-GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906" +; V3-GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc" +; V3-GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack" +; V3-GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc" +; V3-GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908" +; V3-GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+sram-ecc" +; V3-GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack" +; V3-GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack+sram-ecc" +; V3-GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909" +; V3-GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909+xnack" +; V3-GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c" +; V3-GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c+xnack" +; V3-GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010" +; V3-GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" +; V3-GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011" +; V3-GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011+xnack" +; V3-GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012" +; V3-GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012+xnack" +; V3-GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030" +; V3-GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031" +; V3-GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032" +; V3-GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033" ; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" ; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" @@ -57,23 +227,60 @@ ; GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704" ; GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705" ; GFX801: .amdgcn_target "amdgcn-amd-amdhsa--gfx801" +; GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801:xnack-" +; GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801:xnack+" ; GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802" ; GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803" ; GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805" ; GFX810: .amdgcn_target "amdgcn-amd-amdhsa--gfx810" +; GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack-" +; GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack+" ; GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" +; GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" +; GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902" +; GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack-" +; GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack+" ; GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904" +; GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack-" +; GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" ; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906" - -; XNACK-GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" -; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902" - -; SRAM-ECC-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+sramecc" -; SRAM-ECC-GFX906: "amdgcn-amd-amdhsa--gfx906+sramecc" - -; SRAM-ECC-XNACK-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack+sramecc" -; SRAM-ECC-XNACK-GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sramecc" +; GFX906-NOSRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-" +; GFX906-SRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+" +; GFX906-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:xnack-" +; GFX906-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:xnack+" +; GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-:xnack-" +; GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+:xnack-" +; GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-:xnack+" +; GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+:xnack+" +; GFX908: .amdgcn_target "amdgcn-amd-amdhsa--gfx908" +; GFX908-NOSRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-" +; GFX908-SRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+" +; GFX908-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:xnack-" +; GFX908-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:xnack+" +; GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-:xnack-" +; GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+:xnack-" +; GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-:xnack+" +; GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+:xnack+" +; GFX909: .amdgcn_target "amdgcn-amd-amdhsa--gfx909" +; GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909:xnack-" +; GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909:xnack+" +; GFX90C: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c" +; GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack-" +; GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack+" +; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010" +; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-" +; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+" +; GFX1011: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011" +; GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011:xnack-" +; GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011:xnack+" +; GFX1012: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012" +; GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack-" +; GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack+" +; GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030" +; GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031" +; GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032" +; GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033" define amdgpu_kernel void @directive_amdgcn_target() { ret void Index: llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll +++ llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll @@ -1,28 +1,31 @@ -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=-sramecc < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sramecc < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx908 < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=SRAM-ECC-GFX908 %s ; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx908 -mattr=+sramecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=SRAM-ECC-GFX908 %s ; NO-SRAM-ECC-GFX906: Flags [ +; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100) ; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) ; NO-SRAM-ECC-GFX906-NEXT: ] ; SRAM-ECC-GFX906: Flags [ +; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) +; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100) ; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) -; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200) ; SRAM-ECC-GFX906-NEXT: ] ; SRAM-ECC-XNACK-GFX906: Flags [ +; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) +; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100) ; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) -; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200) -; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_XNACK (0x100) ; SRAM-ECC-XNACK-GFX906-NEXT: ] -; SRAM-ECC-GFX908: Flags [ (0x230) +; SRAM-ECC-GFX908: Flags [ +; SRAM-ECC-GFX908: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) ; SRAM-ECC-GFX908: EF_AMDGPU_MACH_AMDGCN_GFX908 (0x30) -; SRAM-ECC-GFX908: EF_AMDGPU_SRAM_ECC (0x200) ; SRAM-ECC-GFX908: ] define amdgpu_kernel void @elf_header() { Index: llvm/test/CodeGen/AMDGPU/elf-header-flags-xnack.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/elf-header-flags-xnack.ll +++ llvm/test/CodeGen/AMDGPU/elf-header-flags-xnack.ll @@ -1,14 +1,16 @@ -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx801 -mattr=-xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-XNACK-GFX801 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx802 -mattr=+xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=XNACK-GFX802 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=XNACK-GFX801 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx801 -mattr=+xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=XNACK-GFX801 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx802 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-XNACK-GFX802 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx802 -mattr=-xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-XNACK-GFX802 %s -; NO-XNACK-GFX801: Flags [ -; NO-XNACK-GFX801-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX801 (0x28) -; NO-XNACK-GFX801-NEXT: ] +; XNACK-GFX801: Flags [ +; XNACK-GFX801-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100) +; XNACK-GFX801-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX801 (0x28) +; XNACK-GFX801-NEXT: ] -; XNACK-GFX802: Flags [ -; XNACK-GFX802-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX802 (0x29) -; XNACK-GFX802-NEXT: EF_AMDGPU_XNACK (0x100) -; XNACK-GFX802-NEXT: ] +; NO-XNACK-GFX802: Flags [ +; NO-XNACK-GFX802-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX802 (0x29) +; NO-XNACK-GFX802-NEXT: ] define amdgpu_kernel void @elf_header() { ret void Index: llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll +++ llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll @@ -13,11 +13,11 @@ ; NONE: OS/ABI: SystemV (0x0) ; HSA: OS/ABI: AMDGPU_HSA (0x40) -; HSA: ABIVersion: 1 +; HSA: ABIVersion: 2 ; PAL: OS/ABI: AMDGPU_PAL (0x41) ; PAL: ABIVersion: 0 ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) -; MESA3D: ABIVersion: 0 +; MESA3D: ABIVersion: 0 define amdgpu_kernel void @elf_header() { ret void Index: llvm/test/CodeGen/AMDGPU/elf-notes.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/elf-notes.ll +++ llvm/test/CodeGen/AMDGPU/elf-notes.ll @@ -16,13 +16,13 @@ ; OSABI-UNK-NOT: .amd_amdgpu_pal_metadata ; OSABI-UNK-ELF-NOT: Unknown note type -; OSABI-UNK-ELF: NT_AMD_AMDGPU_ISA (ISA Version) -; OSABI-UNK-ELF: ISA Version: +; OSABI-UNK-ELF: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name) +; OSABI-UNK-ELF: AMD HSA ISA Name: ; OSABI-UNK-ELF: amdgcn-amd-unknown--gfx802 ; OSABI-UNK-ELF-NOT: Unknown note type -; OSABI-UNK-ELF-NOT: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) +; OSABI-UNK-ELF-NOT: NT_AMD_HSA_METADATA (AMD HSA Metadata) ; OSABI-UNK-ELF-NOT: Unknown note type -; OSABI-UNK-ELF-NOT: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata) +; OSABI-UNK-ELF-NOT: NT_AMD_PAL_METADATA (AMD PAL Metadata) ; OSABI-UNK-ELF-NOT: Unknown note type ; OSABI-HSA: .hsa_code_object_version @@ -31,12 +31,12 @@ ; OSABI-HSA: .amd_amdgpu_hsa_metadata ; OSABI-HSA-NOT: .amd_amdgpu_pal_metadata -; OSABI-HSA-ELF: Unknown note type: (0x00000001) -; OSABI-HSA-ELF: Unknown note type: (0x00000003) -; OSABI-HSA-ELF: NT_AMD_AMDGPU_ISA (ISA Version) -; OSABI-HSA-ELF: ISA Version: +; OSABI-HSA-ELF: NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version) +; OSABI-HSA-ELF: NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version) +; OSABI-HSA-ELF: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name) +; OSABI-HSA-ELF: AMD HSA ISA Name: ; OSABI-HSA-ELF: amdgcn-amd-amdhsa--gfx802 -; OSABI-HSA-ELF: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) +; OSABI-HSA-ELF: NT_AMD_HSA_METADATA (AMD HSA Metadata) ; OSABI-HSA-ELF: HSA Metadata: ; OSABI-HSA-ELF: --- ; OSABI-HSA-ELF: Version: [ 1, 0 ] @@ -51,18 +51,18 @@ ; OSABI-HSA-ELF: WavefrontSize: 64 ; OSABI-HSA-ELF: NumSGPRs: 96 ; OSABI-HSA-ELF: ... -; OSABI-HSA-ELF-NOT: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata) +; OSABI-HSA-ELF-NOT: NT_AMD_PAL_METADATA (AMD PAL Metadata) ; OSABI-PAL-NOT: .hsa_code_object_version ; OSABI-PAL: .hsa_code_object_isa ; OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802" ; OSABI-PAL-NOT: .amd_amdgpu_hsa_metadata -; OSABI-PAL-ELF: Unknown note type: (0x00000003) -; OSABI-PAL-ELF: NT_AMD_AMDGPU_ISA (ISA Version) -; OSABI-PAL-ELF: ISA Version: +; OSABI-PAL-ELF: NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version) +; OSABI-PAL-ELF: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name) +; OSABI-PAL-ELF: AMD HSA ISA Name: ; OSABI-PAL-ELF: amdgcn-amd-amdpal--gfx802 -; OSABI-PAL-ELF-NOT: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) +; OSABI-PAL-ELF-NOT: NT_AMD_HSA_METADATA (AMD HSA Metadata) ; OSABI-PAL-ELF: NT_AMDGPU_METADATA (AMDGPU Metadata) ; OSABI-PAL-ELF: AMDGPU Metadata: ; OSABI-PAL-ELF: amdpal.pipelines: Index: llvm/test/CodeGen/AMDGPU/fabs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fabs.ll +++ llvm/test/CodeGen/AMDGPU/fabs.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s Index: llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -1,20 +1,27 @@ ; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,GCN %s +; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,GCN %s ; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-CI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-VI-NOXNACK -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-VI-XNACK -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,HSA-CI-V2,GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK-V2,GCN %s + +; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s ; GCN-LABEL: {{^}}no_vcc_no_flat: -; HSA-CI: is_xnack_enabled = 0 -; HSA-VI-NOXNACK: is_xnack_enabled = 0 -; HSA-VI-XNACK: is_xnack_enabled = 1 + +; HSA-CI-V2: is_xnack_enabled = 0 +; HSA-VI-XNACK-V2: is_xnack_enabled = 1 + +; NOT-HSA-CI: .amdhsa_reserve_xnack_mask +; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 +; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 ; CI: ; NumSgprs: 8 ; VI-NOXNACK: ; NumSgprs: 8 @@ -26,9 +33,13 @@ } ; GCN-LABEL: {{^}}vcc_no_flat: -; HSA-CI: is_xnack_enabled = 0 -; HSA-VI-NOXNACK: is_xnack_enabled = 0 -; HSA-VI-XNACK: is_xnack_enabled = 1 + +; HSA-CI-V2: is_xnack_enabled = 0 +; HSA-VI-XNACK-V2: is_xnack_enabled = 1 + +; NOT-HSA-CI: .amdhsa_reserve_xnack_mask +; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 +; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 ; CI: ; NumSgprs: 10 ; VI-NOXNACK: ; NumSgprs: 10 @@ -40,16 +51,17 @@ } ; GCN-LABEL: {{^}}no_vcc_flat: -; HSA-CI: is_xnack_enabled = 0 -; HSA-VI-NOXNACK: is_xnack_enabled = 0 -; HSA-VI-XNACK: is_xnack_enabled = 1 + +; HSA-CI-V2: is_xnack_enabled = 0 +; HSA-VI-XNACK-V2: is_xnack_enabled = 1 + +; NOT-HSA-CI: .amdhsa_reserve_xnack_mask +; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 +; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 ; CI: ; NumSgprs: 12 ; VI-NOXNACK: ; NumSgprs: 14 ; VI-XNACK: ; NumSgprs: 14 -; HSA-CI: ; NumSgprs: 12 -; HSA-VI-NOXNACK: ; NumSgprs: 14 -; HSA-VI-XNACK: ; NumSgprs: 14 define amdgpu_kernel void @no_vcc_flat() { entry: call void asm sideeffect "", "~{s7},~{flat_scratch}"() @@ -57,15 +69,17 @@ } ; GCN-LABEL: {{^}}vcc_flat: -; HSA-NOXNACK: is_xnack_enabled = 0 -; HSA-XNACK: is_xnack_enabled = 1 + +; HSA-CI-V2: is_xnack_enabled = 0 +; HSA-VI-XNACK-V2: is_xnack_enabled = 1 + +; NOT-HSA-CI: .amdhsa_reserve_xnack_mask +; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 +; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 ; CI: ; NumSgprs: 12 ; VI-NOXNACK: ; NumSgprs: 14 ; VI-XNACK: ; NumSgprs: 14 -; HSA-CI: ; NumSgprs: 12 -; HSA-VI-NOXNACK: ; NumSgprs: 14 -; HSA-VI-XNACK: ; NumSgprs: 14 define amdgpu_kernel void @vcc_flat() { entry: call void asm sideeffect "", "~{s7},~{vcc},~{flat_scratch}"() @@ -76,6 +90,14 @@ ; scratch usage and implicit flat uses. ; GCN-LABEL: {{^}}use_flat_scr: + +; HSA-CI-V2: is_xnack_enabled = 0 +; HSA-VI-XNACK-V2: is_xnack_enabled = 1 + +; NOT-HSA-CI: .amdhsa_reserve_xnack_mask +; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 +; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 + ; CI: NumSgprs: 4 ; VI-NOXNACK: NumSgprs: 6 ; VI-XNACK: NumSgprs: 6 @@ -86,6 +108,14 @@ } ; GCN-LABEL: {{^}}use_flat_scr_lo: + +; HSA-CI-V2: is_xnack_enabled = 0 +; HSA-VI-XNACK-V2: is_xnack_enabled = 1 + +; NOT-HSA-CI: .amdhsa_reserve_xnack_mask +; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 +; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 + ; CI: NumSgprs: 4 ; VI-NOXNACK: NumSgprs: 6 ; VI-XNACK: NumSgprs: 6 @@ -96,6 +126,14 @@ } ; GCN-LABEL: {{^}}use_flat_scr_hi: + +; HSA-CI-V2: is_xnack_enabled = 0 +; HSA-VI-XNACK-V2: is_xnack_enabled = 1 + +; NOT-HSA-CI: .amdhsa_reserve_xnack_mask +; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 +; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 + ; CI: NumSgprs: 4 ; VI-NOXNACK: NumSgprs: 6 ; VI-XNACK: NumSgprs: 6 Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK: --- ; CHECK: amdhsa.kernels: Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s %struct.A = type { i8, float } %opencl.image1d_t = type opaque Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s ; CHECK: --- ; CHECK: amdhsa.kernels: Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK: --- ; CHECK: amdhsa.kernels: Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK: --- ; CHECK: amdhsa.kernels: Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s %opencl.image1d_t = type opaque %opencl.image1d_array_t = type opaque Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll @@ -1,13 +1,10 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s -; GCN: --- -; GCN: Kernels: -; GCN: - Name: wavefrontsize -; GCN: CodeProps: -; GFX10-32: WavefrontSize: 32 -; GFX10-64: WavefrontSize: 64 -; GCN: ... +; GCN: amdhsa.kernels: +; GCN: .name: wavefrontsize +; GFX10-32: .wavefront_size: 32 +; GFX10-64: .wavefront_size: 64 define amdgpu_kernel void @wavefrontsize() { entry: ret void Index: llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -1,60 +1,59 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx600 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI600 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx601 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI601 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI704 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI704 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX904 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX906 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx908 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX908 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx909 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX909 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1010 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1011 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1011 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1012 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1012 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1030 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1030 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1031 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1031 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1032 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1032 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1033 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1033 %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx600 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI600 %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx601 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI601 %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx602 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI602 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI702 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI705 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI801 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI801 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX900 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX901 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX902 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX903 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX904 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX905 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX906 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX907 %s ; HSA: .hsa_code_object_version 2,1 -; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU" -; HSA-SI601: .hsa_code_object_isa 6,0,1,"AMD","AMDGPU" +; NONHSA-SI600: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx600" +; NONHSA-SI601: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx601" +; NONHSA-SI602: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx602" ; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" ; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU" ; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU" ; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU" ; HSA-CI704: .hsa_code_object_isa 7,0,4,"AMD","AMDGPU" +; HSA-CI705: .hsa_code_object_isa 7,0,5,"AMD","AMDGPU" ; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" ; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU" ; HSA-VI803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU" +; HSA-VI805: .hsa_code_object_isa 8,0,5,"AMD","AMDGPU" ; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU" ; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU" +; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU" ; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" +; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU" ; HSA-GFX904: .hsa_code_object_isa 9,0,4,"AMD","AMDGPU" +; HSA-GFX905: .hsa_code_object_isa 9,0,5,"AMD","AMDGPU" ; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU" -; HSA-GFX908: .hsa_code_object_isa 9,0,8,"AMD","AMDGPU" -; HSA-GFX909: .hsa_code_object_isa 9,0,9,"AMD","AMDGPU" -; HSA-GFX1010: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU" -; HSA-GFX1011: .hsa_code_object_isa 10,1,1,"AMD","AMDGPU" -; HSA-GFX1012: .hsa_code_object_isa 10,1,2,"AMD","AMDGPU" -; HSA-GFX1030: .hsa_code_object_isa 10,3,0,"AMD","AMDGPU" -; HSA-GFX1031: .hsa_code_object_isa 10,3,1,"AMD","AMDGPU" -; HSA-GFX1032: .hsa_code_object_isa 10,3,2,"AMD","AMDGPU" -; HSA-GFX1033: .hsa_code_object_isa 10,3,3,"AMD","AMDGPU" +; HSA-GFX907: .hsa_code_object_isa 9,0,7,"AMD","AMDGPU" Index: llvm/test/CodeGen/AMDGPU/hsa.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa.ll +++ llvm/test/CodeGen/AMDGPU/hsa.ll @@ -4,8 +4,8 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj --amdhsa-code-object-version=2 | llvm-readobj -symbols -s -sd - | FileCheck --check-prefix=ELF %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -symbols -s -sd - | FileCheck %s --check-prefix=ELF -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W32 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W64 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s ; The SHT_NOTE section contains the output from the .hsa_code_object_* ; directives. @@ -49,12 +49,10 @@ ; HSA: enable_sgpr_kernarg_segment_ptr = 1 ; PRE-GFX10: enable_wavefront_size32 = 0 -; GFX10-W32: enable_wavefront_size32 = 1 -; GFX10-W64: enable_wavefront_size32 = 0 +; GFX10-W32: .amdhsa_wavefront_size32 1 +; GFX10-W64: .amdhsa_wavefront_size32 0 ; PRE-GFX10: wavefront_size = 6 -; GFX10-W32: wavefront_size = 5 -; GFX10-W64: wavefront_size = 6 ; HSA: call_convention = -1 ; HSA: .end_amd_kernel_code_t @@ -66,7 +64,7 @@ ; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000 ; Make sure we generate flat store for HSA ; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} -; GFX10: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +; GFX10: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off ; HSA: .Lfunc_end0: ; HSA: .size simple, .Lfunc_end0-simple Index: llvm/test/CodeGen/AMDGPU/kernarg-size.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/kernarg-size.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=HSA %s + +declare void @llvm.trap() #0 +declare void @llvm.debugtrap() #1 + +; HSA: .amdhsa_kernel trap +; HSA-NEXT: .amdhsa_group_segment_fixed_size 0 +; HSA-NEXT: .amdhsa_private_segment_fixed_size 0 +; HSA-NEXT: .amdhsa_kernarg_size 8 +; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 +; HSA: .end_amdhsa_kernel + +define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) { + store volatile i32 1, i32 addrspace(1)* %arg0 + call void @llvm.trap() + unreachable + store volatile i32 2, i32 addrspace(1)* %arg0 + ret void +} Index: llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll +++ llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -1,9 +1,9 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck --check-prefixes=GCN,CI,ALL %s ; RUN: llc -march=amdgcn -mcpu=carrizo --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,VI,ALL %s ; RUN: llc -march=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,GFX9,ALL %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s -mattr=-flat-for-global | FileCheck --check-prefixes=GCNHSA,ALL %s -; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,GFX10HSA,ALL %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 < %s -mattr=-flat-for-global | FileCheck --check-prefixes=GCNHSA,ALL %s +; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,GFX10HSA,ALL %s ; FIXME: align on alloca seems to be ignored for private_segment_alignment @@ -19,30 +19,6 @@ ; GFX9-DAG: s_mov_b32 s{{[0-9]+}}, 0xe00000 -; GCNHSA: .amd_kernel_code_t - -; GCNHSA: enable_sgpr_private_segment_wave_byte_offset = 1 -; GCNHSA: user_sgpr_count = 8 -; GCNHSA: enable_sgpr_workgroup_id_x = 1 -; GCNHSA: enable_sgpr_workgroup_id_y = 0 -; GCNHSA: enable_sgpr_workgroup_id_z = 0 -; GCNHSA: enable_sgpr_workgroup_info = 0 -; GCNHSA: enable_vgpr_workitem_id = 0 - -; GCNHSA: enable_sgpr_private_segment_buffer = 1 -; GCNHSA: enable_sgpr_dispatch_ptr = 0 -; GCNHSA: enable_sgpr_queue_ptr = 0 -; GCNHSA: enable_sgpr_kernarg_segment_ptr = 1 -; GCNHSA: enable_sgpr_dispatch_id = 0 -; GCNHSA: enable_sgpr_flat_scratch_init = 1 -; GCNHSA: enable_sgpr_private_segment_size = 0 -; GCNHSA: enable_sgpr_grid_workgroup_count_x = 0 -; GCNHSA: enable_sgpr_grid_workgroup_count_y = 0 -; GCNHSA: enable_sgpr_grid_workgroup_count_z = 0 -; GCNHSA: workitem_private_segment_byte_size = 32772 -; GCNHSA: private_segment_alignment = 4 -; GCNHSA: .end_amd_kernel_code_t - ; GFX10HSA: s_add_u32 [[FLAT_SCR_LO:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} ; GFX10HSA-DAG: s_addc_u32 [[FLAT_SCR_HI:s[0-9]+]], s{{[0-9]+}}, 0 ; GFX10HSA-DAG: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), [[FLAT_SCR_LO]] @@ -51,6 +27,39 @@ ; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], 0 offen ; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], 0 offen +; GCNHSA: .amdhsa_kernel large_alloca_compute_shader +; GCNHSA: .amdhsa_group_segment_fixed_size 0 +; GCNHSA: .amdhsa_private_segment_fixed_size 32772 +; GCNHSA: .amdhsa_user_sgpr_private_segment_buffer 1 +; GCNHSA: .amdhsa_user_sgpr_dispatch_ptr 0 +; GCNHSA: .amdhsa_user_sgpr_queue_ptr 0 +; GCNHSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1 +; GCNHSA: .amdhsa_user_sgpr_dispatch_id 0 +; GCNHSA: .amdhsa_user_sgpr_flat_scratch_init 1 +; GCNHSA: .amdhsa_user_sgpr_private_segment_size 0 +; GCNHSA: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +; GCNHSA: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCNHSA: .amdhsa_system_sgpr_workgroup_id_y 0 +; GCNHSA: .amdhsa_system_sgpr_workgroup_id_z 0 +; GCNHSA: .amdhsa_system_sgpr_workgroup_info 0 +; GCNHSA: .amdhsa_system_vgpr_workitem_id 0 +; GCNHSA: .amdhsa_next_free_vgpr 3 +; GCNHSA: .amdhsa_next_free_sgpr 10 +; GCNHSA: .amdhsa_float_round_mode_32 0 +; GCNHSA: .amdhsa_float_round_mode_16_64 0 +; GCNHSA: .amdhsa_float_denorm_mode_32 3 +; GCNHSA: .amdhsa_float_denorm_mode_16_64 3 +; GCNHSA: .amdhsa_dx10_clamp 1 +; GCNHSA: .amdhsa_ieee_mode 1 +; GCNHSA: .amdhsa_exception_fp_ieee_invalid_op 0 +; GCNHSA: .amdhsa_exception_fp_denorm_src 0 +; GCNHSA: .amdhsa_exception_fp_ieee_div_zero 0 +; GCNHSA: .amdhsa_exception_fp_ieee_overflow 0 +; GCNHSA: .amdhsa_exception_fp_ieee_underflow 0 +; GCNHSA: .amdhsa_exception_fp_ieee_inexact 0 +; GCNHSA: .amdhsa_exception_int_div_zero 0 +; GCNHSA: .end_amdhsa_kernel + ; Scratch size = alloca size + emergency stack slot, align {{.*}}, addrspace(5) ; ALL: ; ScratchSize: 32772 define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 { Index: llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll +++ llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - %s 2> %t | FileCheck -check-prefixes=GFX8 %s ; RUN: FileCheck -check-prefix=ERR %s < %t -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s 2> %t | FileCheck -check-prefixes=GFX9 %s ; RUN: FileCheck -check-prefix=ERR %s < %t @lds = internal addrspace(3) global float undef, align 4 @@ -25,7 +25,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: ds_write_b32 v0, v0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5] ; GFX9-NEXT: s_trap 2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -35,12 +34,18 @@ ; ERR: warning: :0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function define void @func_use_lds_global_constexpr_cast() { -; GCN-LABEL: func_use_lds_global_constexpr_cast: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b64 s[0:1], s[4:5] -; GCN-NEXT: s_trap 2 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX8-LABEL: func_use_lds_global_constexpr_cast: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX8-NEXT: s_trap 2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_use_lds_global_constexpr_cast: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_trap 2 +; GFX9-NEXT: s_setpc_b64 s[30:31] store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4 ret void } Index: llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll @@ -2,8 +2,6 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=VI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 { ; GFX9-LABEL: s_lshr_v2i16: Index: llvm/test/CodeGen/AMDGPU/s_addk_i32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/s_addk_i32.ll +++ llvm/test/CodeGen/AMDGPU/s_addk_i32.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; TODO: Some of those tests fail with OS == amdhsa due to unreasonable register ; allocation differences. Index: llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll +++ llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}s_mulk_i32_k0: ; SI: s_load_dword [[VAL:s[0-9]+]] Index: llvm/test/CodeGen/AMDGPU/sram-ecc-default.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sram-ecc-default.ll +++ llvm/test/CodeGen/AMDGPU/sram-ecc-default.ll @@ -1,11 +1,13 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s -; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s +; RUN: llc -march=amdgcn -mcpu=gfx902 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s ; RUN: llc -march=amdgcn -mcpu=gfx904 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s ; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,ECC %s ; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s ; Make sure the correct set of targets are marked with -; FeatureDoesNotSupportSRAMECC, and +sram-ecc is ignored if it's never +; FeatureDoesNotSupportSRAMECC, and +sramecc is ignored if it's never ; supported. ; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg: Index: llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=GFX9 %s ; Make sure the stack is never realigned for entry functions. @@ -20,6 +20,7 @@ ; VI-NEXT: .amdhsa_kernel max_alignment_128 ; VI-NEXT: .amdhsa_group_segment_fixed_size 0 ; VI-NEXT: .amdhsa_private_segment_fixed_size 256 +; VI-NEXT: .amdhsa_kernarg_size 0 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -67,6 +68,7 @@ ; GFX9-NEXT: .amdhsa_kernel max_alignment_128 ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256 +; GFX9-NEXT: .amdhsa_kernarg_size 0 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -83,6 +85,7 @@ ; GFX9-NEXT: .amdhsa_next_free_vgpr 1 ; GFX9-NEXT: .amdhsa_next_free_sgpr 8 ; GFX9-NEXT: .amdhsa_reserve_vcc 0 +; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 @@ -121,6 +124,7 @@ ; VI-NEXT: .amdhsa_kernel stackrealign_attr ; VI-NEXT: .amdhsa_group_segment_fixed_size 0 ; VI-NEXT: .amdhsa_private_segment_fixed_size 8 +; VI-NEXT: .amdhsa_kernarg_size 0 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -168,6 +172,7 @@ ; GFX9-NEXT: .amdhsa_kernel stackrealign_attr ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8 +; GFX9-NEXT: .amdhsa_kernarg_size 0 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -184,6 +189,7 @@ ; GFX9-NEXT: .amdhsa_next_free_vgpr 1 ; GFX9-NEXT: .amdhsa_next_free_sgpr 8 ; GFX9-NEXT: .amdhsa_reserve_vcc 0 +; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 @@ -222,6 +228,7 @@ ; VI-NEXT: .amdhsa_kernel alignstack_attr ; VI-NEXT: .amdhsa_group_segment_fixed_size 0 ; VI-NEXT: .amdhsa_private_segment_fixed_size 128 +; VI-NEXT: .amdhsa_kernarg_size 0 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -269,6 +276,7 @@ ; GFX9-NEXT: .amdhsa_kernel alignstack_attr ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128 +; GFX9-NEXT: .amdhsa_kernarg_size 0 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -285,6 +293,7 @@ ; GFX9-NEXT: .amdhsa_next_free_vgpr 1 ; GFX9-NEXT: .amdhsa_next_free_sgpr 8 ; GFX9-NEXT: .amdhsa_reserve_vcc 0 +; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" +; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900 +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x12C) +; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +; ELF-NEXT: ] + +define void @func0() { +entry: + ret void +} + +define void @func1() { +entry: + ret void +} + +define void @func2() { +entry: + ret void +} Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll @@ -0,0 +1,29 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" +; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x22) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22) +; ELF-NEXT: ] + +define void @func0() { +entry: + ret void +} + +define void @func1() { +entry: + ret void +} + +define void @func2() { +entry: + ret void +} Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x22C) +; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +; ELF-NEXT: ] + +define void @func0() #0 { +entry: + ret void +} + +define void @func1() #0 { +entry: + ret void +} + +define void @func2() #0 { +entry: + ret void +} + +attributes #0 = { "target-features"="-xnack" } Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x32C) +; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +; ELF-NEXT: ] + +define void @func0() #0 { +entry: + ret void +} + +define void @func1() #0 { +entry: + ret void +} + +define void @func2() #0 { +entry: + ret void +} + +attributes #0 = { "target-features"="+xnack" } Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x22C) +; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +; ELF-NEXT: ] + +define void @func0() { +entry: + ret void +} + +define void @func1() #0 { +entry: + ret void +} + +define void @func2() { +entry: + ret void +} + +attributes #0 = { "target-features"="-xnack" } Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x22C) +; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +; ELF-NEXT: ] + +define void @func0() #0 { +entry: + ret void +} + +define void @func1() { +entry: + ret void +} + +define void @func2() { +entry: + ret void +} + +attributes #0 = { "target-features"="-xnack" } Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x32C) +; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +; ELF-NEXT: ] + +define void @func0() { +entry: + ret void +} + +define void @func1() #0 { +entry: + ret void +} + +define void @func2() { +entry: + ret void +} + +attributes #0 = { "target-features"="+xnack" } Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x32C) +; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +; ELF-NEXT: ] + +define void @func0() #0 { +entry: + ret void +} + +define void @func1() { +entry: + ret void +} + +define void @func2() { +entry: + ret void +} + +attributes #0 = { "target-features"="+xnack" } Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s 2>&1 | FileCheck --check-prefixes=ERR %s + +; ERR: error: xnack setting of 'func2' function does not match module xnack setting + +define void @func0() { +entry: + ret void +} + +define void @func1() #0 { +entry: + ret void +} + +define void @func2() #1 { +entry: + ret void +} + +attributes #0 = { "target-features"="-xnack" } +attributes #1 = { "target-features"="+xnack" } Index: llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" +; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900 +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x12C) +; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +; ELF-NEXT: ] + +define void @func0() { +entry: + ret void +} Index: llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" +; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x22) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22) +; ELF-NEXT: ] + +define void @func0() { +entry: + ret void +} Index: llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x22C) +; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +; ELF-NEXT: ] + +define void @func0() #0 { +entry: + ret void +} + +attributes #0 = { "target-features"="-xnack" } Index: llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s + +; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' +; ASM: amdhsa.version: +; ASM: - 1 +; ASM: - 1 + +; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: ABIVersion: 2 +; ELF: Flags [ (0x32C) +; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) +; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) +; ELF-NEXT: ] + +define void @func0() #0 { +entry: + ret void +} + +attributes #0 = { "target-features"="+xnack" } Index: llvm/test/CodeGen/AMDGPU/trap-abis.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/trap-abis.ll @@ -0,0 +1,1181 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V2 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V3 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V4 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V2 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V3 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V4 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V2 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V3 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V4 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V2 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V3 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V4 %s + +declare void @llvm.trap() #0 +declare void @llvm.debugtrap() #1 + +define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) { +; NOHSA-TRAP-GFX900-V2-LABEL: trap: +; NOHSA-TRAP-GFX900-V2: ; %bb.0: +; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 +; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm +; +; NOHSA-TRAP-GFX900-V3-LABEL: trap: +; NOHSA-TRAP-GFX900-V3: ; %bb.0: +; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 +; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm +; +; NOHSA-TRAP-GFX900-V4-LABEL: trap: +; NOHSA-TRAP-GFX900-V4: ; %bb.0: +; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 +; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm +; +; HSA-TRAP-GFX803-V2-LABEL: trap: +; HSA-TRAP-GFX803-V2: .amd_kernel_code_t +; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_major = 1 +; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_minor = 2 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_kind = 1 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_major = 8 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_minor = 0 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_stepping = 3 +; HSA-TRAP-GFX803-V2-NEXT: kernel_code_entry_byte_offset = 256 +; HSA-TRAP-GFX803-V2-NEXT: kernel_code_prefetch_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: granulated_workitem_vgpr_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: granulated_wavefront_sgpr_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: priority = 0 +; HSA-TRAP-GFX803-V2-NEXT: float_mode = 240 +; HSA-TRAP-GFX803-V2-NEXT: priv = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_dx10_clamp = 1 +; HSA-TRAP-GFX803-V2-NEXT: debug_mode = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_ieee_mode = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_wgp_mode = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_mem_ordered = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_fwd_progress = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; HSA-TRAP-GFX803-V2-NEXT: user_sgpr_count = 8 +; HSA-TRAP-GFX803-V2-NEXT: enable_trap_handler = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_x = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_y = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_z = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_info = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_vgpr_workitem_id = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_exception_msb = 0 +; HSA-TRAP-GFX803-V2-NEXT: granulated_lds_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_exception = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_buffer = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_ptr = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_queue_ptr = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_id = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_flat_scratch_init = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_wavefront_size32 = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_ordered_append_gds = 0 +; HSA-TRAP-GFX803-V2-NEXT: private_element_size = 1 +; HSA-TRAP-GFX803-V2-NEXT: is_ptr64 = 1 +; HSA-TRAP-GFX803-V2-NEXT: is_dynamic_callstack = 0 +; HSA-TRAP-GFX803-V2-NEXT: is_debug_enabled = 0 +; HSA-TRAP-GFX803-V2-NEXT: is_xnack_enabled = 0 +; HSA-TRAP-GFX803-V2-NEXT: workitem_private_segment_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: workgroup_group_segment_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: gds_segment_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_byte_size = 8 +; HSA-TRAP-GFX803-V2-NEXT: workgroup_fbarrier_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: wavefront_sgpr_count = 8 +; HSA-TRAP-GFX803-V2-NEXT: workitem_vgpr_count = 3 +; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_first = 0 +; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_first = 0 +; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; HSA-TRAP-GFX803-V2-NEXT: debug_private_segment_buffer_sgpr = 0 +; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_alignment = 4 +; HSA-TRAP-GFX803-V2-NEXT: group_segment_alignment = 4 +; HSA-TRAP-GFX803-V2-NEXT: private_segment_alignment = 4 +; HSA-TRAP-GFX803-V2-NEXT: wavefront_size = 6 +; HSA-TRAP-GFX803-V2-NEXT: call_convention = -1 +; HSA-TRAP-GFX803-V2-NEXT: runtime_loader_kernel_symbol = 0 +; HSA-TRAP-GFX803-V2-NEXT: .end_amd_kernel_code_t +; HSA-TRAP-GFX803-V2-NEXT: ; %bb.0: +; HSA-TRAP-GFX803-V2-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v2, 1 +; HSA-TRAP-GFX803-V2-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s2 +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s3 +; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V2-NEXT: s_trap 2 +; +; HSA-TRAP-GFX803-V3-LABEL: trap: +; HSA-TRAP-GFX803-V3: ; %bb.0: +; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1 +; HSA-TRAP-GFX803-V3-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s2 +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s3 +; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V3-NEXT: s_trap 2 +; +; HSA-TRAP-GFX803-V4-LABEL: trap: +; HSA-TRAP-GFX803-V4: ; %bb.0: +; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1 +; HSA-TRAP-GFX803-V4-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s2 +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s3 +; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V4-NEXT: s_trap 2 +; +; HSA-TRAP-GFX900-V2-LABEL: trap: +; HSA-TRAP-GFX900-V2: .amd_kernel_code_t +; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1 +; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 +; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 +; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: priority = 0 +; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240 +; HSA-TRAP-GFX900-V2-NEXT: priv = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 +; HSA-TRAP-GFX900-V2-NEXT: debug_mode = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; HSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 8 +; HSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0 +; HSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_exception = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 +; HSA-TRAP-GFX900-V2-NEXT: private_element_size = 1 +; HSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1 +; HSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 +; HSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0 +; HSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 +; HSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 +; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8 +; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2 +; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 +; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 +; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; HSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 +; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 +; HSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4 +; HSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4 +; HSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6 +; HSA-TRAP-GFX900-V2-NEXT: call_convention = -1 +; HSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 +; HSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t +; HSA-TRAP-GFX900-V2-NEXT: ; %bb.0: +; HSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 +; HSA-TRAP-GFX900-V2-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[2:3] +; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V2-NEXT: s_trap 2 +; +; HSA-TRAP-GFX900-V3-LABEL: trap: +; HSA-TRAP-GFX900-V3: ; %bb.0: +; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 +; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[2:3] +; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V3-NEXT: s_trap 2 +; +; HSA-TRAP-GFX900-V4-LABEL: trap: +; HSA-TRAP-GFX900-V4: ; %bb.0: +; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 +; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V4-NEXT: s_trap 2 +; +; HSA-NOTRAP-GFX900-V2-LABEL: trap: +; HSA-NOTRAP-GFX900-V2: .amd_kernel_code_t +; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_major = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_kind = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 +; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240 +; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: debug_mode = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: user_sgpr_count = 8 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_trap_handler = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception_msb = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: granulated_lds_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: private_element_size = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: is_ptr64 = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: is_debug_enabled = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 +; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8 +; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 +; HSA-NOTRAP-GFX900-V2-NEXT: group_segment_alignment = 4 +; HSA-NOTRAP-GFX900-V2-NEXT: private_segment_alignment = 4 +; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_size = 6 +; HSA-NOTRAP-GFX900-V2-NEXT: call_convention = -1 +; HSA-NOTRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t +; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.0: +; HSA-NOTRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 +; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm +; +; HSA-NOTRAP-GFX900-V3-LABEL: trap: +; HSA-NOTRAP-GFX900-V3: ; %bb.0: +; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 +; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm +; +; HSA-NOTRAP-GFX900-V4-LABEL: trap: +; HSA-NOTRAP-GFX900-V4: ; %bb.0: +; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 +; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm + store volatile i32 1, i32 addrspace(1)* %arg0 + call void @llvm.trap() + unreachable + store volatile i32 2, i32 addrspace(1)* %arg0 + ret void +} + +define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr { +; NOHSA-TRAP-GFX900-V2-LABEL: non_entry_trap: +; NOHSA-TRAP-GFX900-V2: ; %bb.0: ; %entry +; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc +; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; NOHSA-TRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc +; NOHSA-TRAP-GFX900-V2-NEXT: s_cbranch_vccz BB1_2 +; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret +; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3 +; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm +; NOHSA-TRAP-GFX900-V2-NEXT: BB1_2: ; %trap +; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm +; +; NOHSA-TRAP-GFX900-V3-LABEL: non_entry_trap: +; NOHSA-TRAP-GFX900-V3: ; %bb.0: ; %entry +; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc +; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; NOHSA-TRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc +; NOHSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz BB1_2 +; NOHSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret +; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 +; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm +; NOHSA-TRAP-GFX900-V3-NEXT: BB1_2: ; %trap +; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm +; +; NOHSA-TRAP-GFX900-V4-LABEL: non_entry_trap: +; NOHSA-TRAP-GFX900-V4: ; %bb.0: ; %entry +; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc +; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; NOHSA-TRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc +; NOHSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz BB1_2 +; NOHSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret +; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 +; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm +; NOHSA-TRAP-GFX900-V4-NEXT: BB1_2: ; %trap +; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm +; +; HSA-TRAP-GFX803-V2-LABEL: non_entry_trap: +; HSA-TRAP-GFX803-V2: .amd_kernel_code_t +; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_major = 1 +; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_minor = 2 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_kind = 1 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_major = 8 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_minor = 0 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_stepping = 3 +; HSA-TRAP-GFX803-V2-NEXT: kernel_code_entry_byte_offset = 256 +; HSA-TRAP-GFX803-V2-NEXT: kernel_code_prefetch_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: granulated_workitem_vgpr_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: granulated_wavefront_sgpr_count = 1 +; HSA-TRAP-GFX803-V2-NEXT: priority = 0 +; HSA-TRAP-GFX803-V2-NEXT: float_mode = 240 +; HSA-TRAP-GFX803-V2-NEXT: priv = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_dx10_clamp = 1 +; HSA-TRAP-GFX803-V2-NEXT: debug_mode = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_ieee_mode = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_wgp_mode = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_mem_ordered = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_fwd_progress = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; HSA-TRAP-GFX803-V2-NEXT: user_sgpr_count = 8 +; HSA-TRAP-GFX803-V2-NEXT: enable_trap_handler = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_x = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_y = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_z = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_info = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_vgpr_workitem_id = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_exception_msb = 0 +; HSA-TRAP-GFX803-V2-NEXT: granulated_lds_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_exception = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_buffer = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_ptr = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_queue_ptr = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_id = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_flat_scratch_init = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_wavefront_size32 = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_ordered_append_gds = 0 +; HSA-TRAP-GFX803-V2-NEXT: private_element_size = 1 +; HSA-TRAP-GFX803-V2-NEXT: is_ptr64 = 1 +; HSA-TRAP-GFX803-V2-NEXT: is_dynamic_callstack = 0 +; HSA-TRAP-GFX803-V2-NEXT: is_debug_enabled = 0 +; HSA-TRAP-GFX803-V2-NEXT: is_xnack_enabled = 0 +; HSA-TRAP-GFX803-V2-NEXT: workitem_private_segment_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: workgroup_group_segment_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: gds_segment_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_byte_size = 8 +; HSA-TRAP-GFX803-V2-NEXT: workgroup_fbarrier_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: wavefront_sgpr_count = 10 +; HSA-TRAP-GFX803-V2-NEXT: workitem_vgpr_count = 3 +; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_first = 0 +; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_first = 0 +; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; HSA-TRAP-GFX803-V2-NEXT: debug_private_segment_buffer_sgpr = 0 +; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_alignment = 4 +; HSA-TRAP-GFX803-V2-NEXT: group_segment_alignment = 4 +; HSA-TRAP-GFX803-V2-NEXT: private_segment_alignment = 4 +; HSA-TRAP-GFX803-V2-NEXT: wavefront_size = 6 +; HSA-TRAP-GFX803-V2-NEXT: call_convention = -1 +; HSA-TRAP-GFX803-V2-NEXT: runtime_loader_kernel_symbol = 0 +; HSA-TRAP-GFX803-V2-NEXT: .end_amd_kernel_code_t +; HSA-TRAP-GFX803-V2-NEXT: ; %bb.0: ; %entry +; HSA-TRAP-GFX803-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-V2-NEXT: flat_load_dword v0, v[0:1] glc +; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 +; HSA-TRAP-GFX803-V2-NEXT: s_and_b64 vcc, exec, vcc +; HSA-TRAP-GFX803-V2-NEXT: s_cbranch_vccz BB1_2 +; HSA-TRAP-GFX803-V2-NEXT: ; %bb.1: ; %ret +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v2, 3 +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V2-NEXT: s_endpgm +; HSA-TRAP-GFX803-V2-NEXT: BB1_2: ; %trap +; HSA-TRAP-GFX803-V2-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX803-V2-NEXT: s_trap 2 +; +; HSA-TRAP-GFX803-V3-LABEL: non_entry_trap: +; HSA-TRAP-GFX803-V3: ; %bb.0: ; %entry +; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-V3-NEXT: flat_load_dword v0, v[0:1] glc +; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 +; HSA-TRAP-GFX803-V3-NEXT: s_and_b64 vcc, exec, vcc +; HSA-TRAP-GFX803-V3-NEXT: s_cbranch_vccz BB1_2 +; HSA-TRAP-GFX803-V3-NEXT: ; %bb.1: ; %ret +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 3 +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V3-NEXT: s_endpgm +; HSA-TRAP-GFX803-V3-NEXT: BB1_2: ; %trap +; HSA-TRAP-GFX803-V3-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX803-V3-NEXT: s_trap 2 +; +; HSA-TRAP-GFX803-V4-LABEL: non_entry_trap: +; HSA-TRAP-GFX803-V4: ; %bb.0: ; %entry +; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-V4-NEXT: flat_load_dword v0, v[0:1] glc +; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 +; HSA-TRAP-GFX803-V4-NEXT: s_and_b64 vcc, exec, vcc +; HSA-TRAP-GFX803-V4-NEXT: s_cbranch_vccz BB1_2 +; HSA-TRAP-GFX803-V4-NEXT: ; %bb.1: ; %ret +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 3 +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V4-NEXT: s_endpgm +; HSA-TRAP-GFX803-V4-NEXT: BB1_2: ; %trap +; HSA-TRAP-GFX803-V4-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX803-V4-NEXT: s_trap 2 +; +; HSA-TRAP-GFX900-V2-LABEL: non_entry_trap: +; HSA-TRAP-GFX900-V2: .amd_kernel_code_t +; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1 +; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 +; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 +; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1 +; HSA-TRAP-GFX900-V2-NEXT: priority = 0 +; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240 +; HSA-TRAP-GFX900-V2-NEXT: priv = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 +; HSA-TRAP-GFX900-V2-NEXT: debug_mode = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; HSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 8 +; HSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0 +; HSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_exception = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 +; HSA-TRAP-GFX900-V2-NEXT: private_element_size = 1 +; HSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1 +; HSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 +; HSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0 +; HSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 +; HSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 +; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10 +; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2 +; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 +; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 +; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; HSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 +; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 +; HSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4 +; HSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4 +; HSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6 +; HSA-TRAP-GFX900-V2-NEXT: call_convention = -1 +; HSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 +; HSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t +; HSA-TRAP-GFX900-V2-NEXT: ; %bb.0: ; %entry +; HSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc +; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; HSA-TRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc +; HSA-TRAP-GFX900-V2-NEXT: s_cbranch_vccz BB1_2 +; HSA-TRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret +; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3 +; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V2-NEXT: s_endpgm +; HSA-TRAP-GFX900-V2-NEXT: BB1_2: ; %trap +; HSA-TRAP-GFX900-V2-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX900-V2-NEXT: s_trap 2 +; +; HSA-TRAP-GFX900-V3-LABEL: non_entry_trap: +; HSA-TRAP-GFX900-V3: ; %bb.0: ; %entry +; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc +; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; HSA-TRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc +; HSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz BB1_2 +; HSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret +; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 +; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V3-NEXT: s_endpgm +; HSA-TRAP-GFX900-V3-NEXT: BB1_2: ; %trap +; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX900-V3-NEXT: s_trap 2 +; +; HSA-TRAP-GFX900-V4-LABEL: non_entry_trap: +; HSA-TRAP-GFX900-V4: ; %bb.0: ; %entry +; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc +; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; HSA-TRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc +; HSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz BB1_2 +; HSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret +; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 +; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V4-NEXT: s_endpgm +; HSA-TRAP-GFX900-V4-NEXT: BB1_2: ; %trap +; HSA-TRAP-GFX900-V4-NEXT: s_trap 2 +; +; HSA-NOTRAP-GFX900-V2-LABEL: non_entry_trap: +; HSA-NOTRAP-GFX900-V2: .amd_kernel_code_t +; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_major = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_kind = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 +; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240 +; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: debug_mode = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: user_sgpr_count = 8 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_trap_handler = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception_msb = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: granulated_lds_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: private_element_size = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: is_ptr64 = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: is_debug_enabled = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 +; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10 +; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 +; HSA-NOTRAP-GFX900-V2-NEXT: group_segment_alignment = 4 +; HSA-NOTRAP-GFX900-V2-NEXT: private_segment_alignment = 4 +; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_size = 6 +; HSA-NOTRAP-GFX900-V2-NEXT: call_convention = -1 +; HSA-NOTRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t +; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.0: ; %entry +; HSA-NOTRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc +; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; HSA-NOTRAP-GFX900-V2-NEXT: s_and_b64 vcc, exec, vcc +; HSA-NOTRAP-GFX900-V2-NEXT: s_cbranch_vccz BB1_2 +; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret +; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3 +; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm +; HSA-NOTRAP-GFX900-V2-NEXT: BB1_2: ; %trap +; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm +; +; HSA-NOTRAP-GFX900-V3-LABEL: non_entry_trap: +; HSA-NOTRAP-GFX900-V3: ; %bb.0: ; %entry +; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc +; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; HSA-NOTRAP-GFX900-V3-NEXT: s_and_b64 vcc, exec, vcc +; HSA-NOTRAP-GFX900-V3-NEXT: s_cbranch_vccz BB1_2 +; HSA-NOTRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret +; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 +; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm +; HSA-NOTRAP-GFX900-V3-NEXT: BB1_2: ; %trap +; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm +; +; HSA-NOTRAP-GFX900-V4-LABEL: non_entry_trap: +; HSA-NOTRAP-GFX900-V4: ; %bb.0: ; %entry +; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc +; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; HSA-NOTRAP-GFX900-V4-NEXT: s_and_b64 vcc, exec, vcc +; HSA-NOTRAP-GFX900-V4-NEXT: s_cbranch_vccz BB1_2 +; HSA-NOTRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret +; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 +; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm +; HSA-NOTRAP-GFX900-V4-NEXT: BB1_2: ; %trap +; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm +entry: + %tmp29 = load volatile i32, i32 addrspace(1)* %arg0 + %cmp = icmp eq i32 %tmp29, -1 + br i1 %cmp, label %ret, label %trap + +trap: + call void @llvm.trap() + unreachable + +ret: + store volatile i32 3, i32 addrspace(1)* %arg0 + ret void +} + +define amdgpu_kernel void @debugtrap(i32 addrspace(1)* nocapture readonly %arg0) { +; NOHSA-TRAP-GFX900-V2-LABEL: debugtrap: +; NOHSA-TRAP-GFX900-V2: ; %bb.0: +; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 +; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2 +; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1] +; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm +; +; NOHSA-TRAP-GFX900-V3-LABEL: debugtrap: +; NOHSA-TRAP-GFX900-V3: ; %bb.0: +; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 +; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2 +; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1] +; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm +; +; NOHSA-TRAP-GFX900-V4-LABEL: debugtrap: +; NOHSA-TRAP-GFX900-V4: ; %bb.0: +; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 +; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2 +; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1] +; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm +; +; HSA-TRAP-GFX803-V2-LABEL: debugtrap: +; HSA-TRAP-GFX803-V2: .amd_kernel_code_t +; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_major = 1 +; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_minor = 2 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_kind = 1 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_major = 8 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_minor = 0 +; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_stepping = 3 +; HSA-TRAP-GFX803-V2-NEXT: kernel_code_entry_byte_offset = 256 +; HSA-TRAP-GFX803-V2-NEXT: kernel_code_prefetch_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: granulated_workitem_vgpr_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: granulated_wavefront_sgpr_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: priority = 0 +; HSA-TRAP-GFX803-V2-NEXT: float_mode = 240 +; HSA-TRAP-GFX803-V2-NEXT: priv = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_dx10_clamp = 1 +; HSA-TRAP-GFX803-V2-NEXT: debug_mode = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_ieee_mode = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_wgp_mode = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_mem_ordered = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_fwd_progress = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; HSA-TRAP-GFX803-V2-NEXT: user_sgpr_count = 8 +; HSA-TRAP-GFX803-V2-NEXT: enable_trap_handler = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_x = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_y = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_z = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_info = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_vgpr_workitem_id = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_exception_msb = 0 +; HSA-TRAP-GFX803-V2-NEXT: granulated_lds_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_exception = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_buffer = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_ptr = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_queue_ptr = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_id = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_flat_scratch_init = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_wavefront_size32 = 0 +; HSA-TRAP-GFX803-V2-NEXT: enable_ordered_append_gds = 0 +; HSA-TRAP-GFX803-V2-NEXT: private_element_size = 1 +; HSA-TRAP-GFX803-V2-NEXT: is_ptr64 = 1 +; HSA-TRAP-GFX803-V2-NEXT: is_dynamic_callstack = 0 +; HSA-TRAP-GFX803-V2-NEXT: is_debug_enabled = 0 +; HSA-TRAP-GFX803-V2-NEXT: is_xnack_enabled = 0 +; HSA-TRAP-GFX803-V2-NEXT: workitem_private_segment_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: workgroup_group_segment_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: gds_segment_byte_size = 0 +; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_byte_size = 8 +; HSA-TRAP-GFX803-V2-NEXT: workgroup_fbarrier_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: wavefront_sgpr_count = 8 +; HSA-TRAP-GFX803-V2-NEXT: workitem_vgpr_count = 4 +; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_first = 0 +; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_first = 0 +; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_count = 0 +; HSA-TRAP-GFX803-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; HSA-TRAP-GFX803-V2-NEXT: debug_private_segment_buffer_sgpr = 0 +; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_alignment = 4 +; HSA-TRAP-GFX803-V2-NEXT: group_segment_alignment = 4 +; HSA-TRAP-GFX803-V2-NEXT: private_segment_alignment = 4 +; HSA-TRAP-GFX803-V2-NEXT: wavefront_size = 6 +; HSA-TRAP-GFX803-V2-NEXT: call_convention = -1 +; HSA-TRAP-GFX803-V2-NEXT: runtime_loader_kernel_symbol = 0 +; HSA-TRAP-GFX803-V2-NEXT: .end_amd_kernel_code_t +; HSA-TRAP-GFX803-V2-NEXT: ; %bb.0: +; HSA-TRAP-GFX803-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v2, 1 +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v3, 2 +; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V2-NEXT: s_trap 3 +; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v3 +; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V2-NEXT: s_endpgm +; +; HSA-TRAP-GFX803-V3-LABEL: debugtrap: +; HSA-TRAP-GFX803-V3: ; %bb.0: +; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1 +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v3, 2 +; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V3-NEXT: s_trap 3 +; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v3 +; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V3-NEXT: s_endpgm +; +; HSA-TRAP-GFX803-V4-LABEL: debugtrap: +; HSA-TRAP-GFX803-V4: ; %bb.0: +; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1 +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v3, 2 +; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V4-NEXT: s_trap 3 +; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v3 +; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-V4-NEXT: s_endpgm +; +; HSA-TRAP-GFX900-V2-LABEL: debugtrap: +; HSA-TRAP-GFX900-V2: .amd_kernel_code_t +; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1 +; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 +; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 +; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 +; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: priority = 0 +; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240 +; HSA-TRAP-GFX900-V2-NEXT: priv = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 +; HSA-TRAP-GFX900-V2-NEXT: debug_mode = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; HSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 8 +; HSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0 +; HSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_exception = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 +; HSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 +; HSA-TRAP-GFX900-V2-NEXT: private_element_size = 1 +; HSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1 +; HSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 +; HSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0 +; HSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 +; HSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 +; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 +; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8 +; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3 +; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 +; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 +; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 +; HSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; HSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 +; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 +; HSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4 +; HSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4 +; HSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6 +; HSA-TRAP-GFX900-V2-NEXT: call_convention = -1 +; HSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 +; HSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t +; HSA-TRAP-GFX900-V2-NEXT: ; %bb.0: +; HSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 +; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2 +; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V2-NEXT: s_trap 3 +; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1] +; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V2-NEXT: s_endpgm +; +; HSA-TRAP-GFX900-V3-LABEL: debugtrap: +; HSA-TRAP-GFX900-V3: ; %bb.0: +; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 +; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2 +; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V3-NEXT: s_trap 3 +; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1] +; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V3-NEXT: s_endpgm +; +; HSA-TRAP-GFX900-V4-LABEL: debugtrap: +; HSA-TRAP-GFX900-V4: ; %bb.0: +; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 +; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2 +; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V4-NEXT: s_trap 3 +; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1] +; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-V4-NEXT: s_endpgm +; +; HSA-NOTRAP-GFX900-V2-LABEL: debugtrap: +; HSA-NOTRAP-GFX900-V2: .amd_kernel_code_t +; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_major = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_kind = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 +; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240 +; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: debug_mode = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: user_sgpr_count = 8 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_trap_handler = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception_msb = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: granulated_lds_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: private_element_size = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: is_ptr64 = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: is_debug_enabled = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 +; HSA-NOTRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 +; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8 +; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 +; HSA-NOTRAP-GFX900-V2-NEXT: group_segment_alignment = 4 +; HSA-NOTRAP-GFX900-V2-NEXT: private_segment_alignment = 4 +; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_size = 6 +; HSA-NOTRAP-GFX900-V2-NEXT: call_convention = -1 +; HSA-NOTRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 +; HSA-NOTRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t +; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.0: +; HSA-NOTRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 +; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2 +; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1] +; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm +; +; HSA-NOTRAP-GFX900-V3-LABEL: debugtrap: +; HSA-NOTRAP-GFX900-V3: ; %bb.0: +; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 +; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2 +; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1] +; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm +; +; HSA-NOTRAP-GFX900-V4-LABEL: debugtrap: +; HSA-NOTRAP-GFX900-V4: ; %bb.0: +; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 +; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2 +; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1] +; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm + store volatile i32 1, i32 addrspace(1)* %arg0 + call void @llvm.debugtrap() + store volatile i32 2, i32 addrspace(1)* %arg0 + ret void +} + +attributes #0 = { nounwind noreturn } +attributes #1 = { nounwind } Index: llvm/test/MC/AMDGPU/hsa-diag-v3.s =================================================================== --- llvm/test/MC/AMDGPU/hsa-diag-v3.s +++ llvm/test/MC/AMDGPU/hsa-diag-v3.s @@ -1,16 +1,16 @@ -// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,NONGFX10,AMDHSA -// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10,AMDHSA -// RUN: not llvm-mc -triple amdgcn-amd- -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,NONGFX10,AMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10,AMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA // RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,NONGFX10,AMDHSA,ALL .text // GCN-LABEL: warning: test_target // GFX8-NOT: error: -// GFX10: error: target must match options -// NONAMDHSA: error: unknown directive +// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1010+xnack +// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-unknown--gfx810 .warning "test_target" -.amdgcn_target "amdgcn-amd-amdhsa--gfx803+xnack" +.amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack" // GCN-LABEL: warning: test_amdhsa_kernel_no_name // GCN: error: unknown directive Index: llvm/test/MC/AMDGPU/hsa-gfx10-v3.s =================================================================== --- llvm/test/MC/AMDGPU/hsa-gfx10-v3.s +++ llvm/test/MC/AMDGPU/hsa-gfx10-v3.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -filetype=obj < %s > %t +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t // RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s // RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s @@ -28,7 +28,7 @@ // OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000 // complete -// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000 +// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000 @@ -80,6 +80,7 @@ .amdhsa_kernel complete .amdhsa_group_segment_fixed_size 1 .amdhsa_private_segment_fixed_size 1 + .amdhsa_kernarg_size 8 .amdhsa_user_sgpr_private_segment_buffer 1 .amdhsa_user_sgpr_dispatch_ptr 1 .amdhsa_user_sgpr_queue_ptr 1 @@ -98,7 +99,7 @@ .amdhsa_next_free_sgpr 27 .amdhsa_reserve_vcc 0 .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 0 + .amdhsa_reserve_xnack_mask 1 .amdhsa_float_round_mode_32 1 .amdhsa_float_round_mode_16_64 1 .amdhsa_float_denorm_mode_32 1 @@ -121,6 +122,7 @@ // ASM: .amdhsa_kernel complete // ASM-NEXT: .amdhsa_group_segment_fixed_size 1 // ASM-NEXT: .amdhsa_private_segment_fixed_size 1 +// ASM-NEXT: .amdhsa_kernarg_size 8 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 @@ -139,7 +141,7 @@ // ASM-NEXT: .amdhsa_next_free_sgpr 27 // ASM-NEXT: .amdhsa_reserve_vcc 0 // ASM-NEXT: .amdhsa_reserve_flat_scratch 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 // ASM-NEXT: .amdhsa_float_round_mode_32 1 // ASM-NEXT: .amdhsa_float_round_mode_16_64 1 // ASM-NEXT: .amdhsa_float_denorm_mode_32 1 @@ -169,7 +171,7 @@ .amdhsa_reserve_flat_scratch 1 .amdhsa_reserve_vcc 0 - .amdhsa_reserve_xnack_mask 0 + .amdhsa_reserve_xnack_mask 1 .amdhsa_float_denorm_mode_16_64 0 .amdhsa_dx10_clamp 0 @@ -181,7 +183,7 @@ // ASM: .amdhsa_next_free_vgpr 0 // ASM-NEXT: .amdhsa_next_free_sgpr 27 // ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 // ASM: .amdhsa_float_denorm_mode_16_64 0 // ASM-NEXT: .amdhsa_dx10_clamp 0 // ASM-NEXT: .amdhsa_ieee_mode 0 Index: llvm/test/MC/AMDGPU/hsa-v3.s =================================================================== --- llvm/test/MC/AMDGPU/hsa-v3.s +++ llvm/test/MC/AMDGPU/hsa-v3.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t // RUN: llvm-readelf -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s // RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s @@ -31,7 +31,7 @@ // OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000 // complete -// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000 +// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000 @@ -93,6 +93,7 @@ .amdhsa_kernel complete .amdhsa_group_segment_fixed_size 1 .amdhsa_private_segment_fixed_size 1 + .amdhsa_kernarg_size 8 .amdhsa_user_sgpr_private_segment_buffer 1 .amdhsa_user_sgpr_dispatch_ptr 1 .amdhsa_user_sgpr_queue_ptr 1 @@ -110,7 +111,7 @@ .amdhsa_next_free_sgpr 27 .amdhsa_reserve_vcc 0 .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 0 + .amdhsa_reserve_xnack_mask 1 .amdhsa_float_round_mode_32 1 .amdhsa_float_round_mode_16_64 1 .amdhsa_float_denorm_mode_32 1 @@ -130,6 +131,7 @@ // ASM: .amdhsa_kernel complete // ASM-NEXT: .amdhsa_group_segment_fixed_size 1 // ASM-NEXT: .amdhsa_private_segment_fixed_size 1 +// ASM-NEXT: .amdhsa_kernarg_size 8 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 @@ -147,7 +149,7 @@ // ASM-NEXT: .amdhsa_next_free_sgpr 27 // ASM-NEXT: .amdhsa_reserve_vcc 0 // ASM-NEXT: .amdhsa_reserve_flat_scratch 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 // ASM-NEXT: .amdhsa_float_round_mode_32 1 // ASM-NEXT: .amdhsa_float_round_mode_16_64 1 // ASM-NEXT: .amdhsa_float_denorm_mode_32 1 @@ -174,7 +176,7 @@ .amdhsa_reserve_flat_scratch 1 .amdhsa_reserve_vcc 0 - .amdhsa_reserve_xnack_mask 0 + .amdhsa_reserve_xnack_mask 1 .amdhsa_float_denorm_mode_16_64 0 .amdhsa_dx10_clamp 0 @@ -186,7 +188,7 @@ // ASM: .amdhsa_next_free_vgpr 0 // ASM-NEXT: .amdhsa_next_free_sgpr 27 // ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 // ASM: .amdhsa_float_denorm_mode_16_64 0 // ASM-NEXT: .amdhsa_dx10_clamp 0 // ASM-NEXT: .amdhsa_ieee_mode 0 Index: llvm/test/MC/AMDGPU/hsa-v4.s =================================================================== --- /dev/null +++ llvm/test/MC/AMDGPU/hsa-v4.s @@ -0,0 +1,303 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack -filetype=obj < %s > %t +// RUN: llvm-readelf -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// READOBJ: Section Headers +// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 +// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 + +// READOBJ: Relocation section '.rela.rodata' at offset +// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 +// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 +// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210 +// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310 + +// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: +// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal +// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete +// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr +// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr +// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd +// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd +// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd +// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd + +// OBJDUMP: Contents of section .rodata +// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. +// minimal +// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000 +// complete +// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000 +// special_sgpr +// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000 +// disabled_user_sgpr +// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000 + +.text +// ASM: .text + +.amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" + +.p2align 8 +.type minimal,@function +minimal: + s_endpgm + +.p2align 8 +.type complete,@function +complete: + s_endpgm + +.p2align 8 +.type special_sgpr,@function +special_sgpr: + s_endpgm + +.p2align 8 +.type disabled_user_sgpr,@function +disabled_user_sgpr: + s_endpgm + +.rodata +// ASM: .rodata + +// Test that only specifying required directives is allowed, and that defaulted +// values are omitted. +.p2align 6 +.amdhsa_kernel minimal + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel minimal +// ASM: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM: .end_amdhsa_kernel + +// Test that we can specify all available directives with non-default values. +.p2align 6 +.amdhsa_kernel complete + .amdhsa_group_segment_fixed_size 1 + .amdhsa_private_segment_fixed_size 1 + .amdhsa_kernarg_size 8 + .amdhsa_user_sgpr_private_segment_buffer 1 + .amdhsa_user_sgpr_dispatch_ptr 1 + .amdhsa_user_sgpr_queue_ptr 1 + .amdhsa_user_sgpr_kernarg_segment_ptr 1 + .amdhsa_user_sgpr_dispatch_id 1 + .amdhsa_user_sgpr_flat_scratch_init 1 + .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_system_sgpr_private_segment_wavefront_offset 1 + .amdhsa_system_sgpr_workgroup_id_x 0 + .amdhsa_system_sgpr_workgroup_id_y 1 + .amdhsa_system_sgpr_workgroup_id_z 1 + .amdhsa_system_sgpr_workgroup_info 1 + .amdhsa_system_vgpr_workitem_id 1 + .amdhsa_next_free_vgpr 9 + .amdhsa_next_free_sgpr 27 + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 1 + .amdhsa_float_round_mode_32 1 + .amdhsa_float_round_mode_16_64 1 + .amdhsa_float_denorm_mode_32 1 + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_fp16_overflow 1 + .amdhsa_exception_fp_ieee_invalid_op 1 + .amdhsa_exception_fp_denorm_src 1 + .amdhsa_exception_fp_ieee_div_zero 1 + .amdhsa_exception_fp_ieee_overflow 1 + .amdhsa_exception_fp_ieee_underflow 1 + .amdhsa_exception_fp_ieee_inexact 1 + .amdhsa_exception_int_div_zero 1 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel complete +// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 +// ASM-NEXT: .amdhsa_kernarg_size 8 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 +// ASM-NEXT: .amdhsa_next_free_vgpr 9 +// ASM-NEXT: .amdhsa_next_free_sgpr 27 +// ASM-NEXT: .amdhsa_reserve_vcc 0 +// ASM-NEXT: .amdhsa_reserve_flat_scratch 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 1 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 +// ASM-NEXT: .amdhsa_dx10_clamp 0 +// ASM-NEXT: .amdhsa_ieee_mode 0 +// ASM-NEXT: .amdhsa_fp16_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel + +// Test that we are including special SGPR usage in the granulated count. +.p2align 6 +.amdhsa_kernel special_sgpr + // Same next_free_sgpr as "complete", but... + .amdhsa_next_free_sgpr 27 + // ...on GFX9 this should require an additional 6 SGPRs, pushing us from + // 3 granules to 4 + .amdhsa_reserve_flat_scratch 1 + + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_xnack_mask 1 + + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_next_free_vgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel special_sgpr +// ASM: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 27 +// ASM-NEXT: .amdhsa_reserve_vcc 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM: .amdhsa_float_denorm_mode_16_64 0 +// ASM-NEXT: .amdhsa_dx10_clamp 0 +// ASM-NEXT: .amdhsa_ieee_mode 0 +// ASM: .end_amdhsa_kernel + +// Test that explicitly disabling user_sgpr's does not affect the user_sgpr +// count, i.e. this should produce the same descriptor as minimal. +.p2align 6 +.amdhsa_kernel disabled_user_sgpr + .amdhsa_user_sgpr_private_segment_buffer 0 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel disabled_user_sgpr +// ASM: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM: .end_amdhsa_kernel + +.section .foo + +.byte .amdgcn.gfx_generation_number +// ASM: .byte 9 + +.byte .amdgcn.gfx_generation_minor +// ASM: .byte 0 + +.byte .amdgcn.gfx_generation_stepping +// ASM: .byte 4 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 0 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 0 + +v_mov_b32_e32 v7, s10 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 8 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 11 + +.set .amdgcn.next_free_vgpr, 0 +.set .amdgcn.next_free_sgpr, 0 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 0 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 0 + +v_mov_b32_e32 v16, s3 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 17 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 4 + +// Metadata + +.amdgpu_metadata + amdhsa.version: + - 3 + - 0 + amdhsa.kernels: + - .name: amd_kernel_code_t_test_all + .symbol: amd_kernel_code_t_test_all@kd + .kernarg_segment_size: 8 + .group_segment_fixed_size: 16 + .private_segment_fixed_size: 32 + .kernarg_segment_align: 64 + .wavefront_size: 128 + .sgpr_count: 14 + .vgpr_count: 40 + .max_flat_workgroup_size: 256 + - .name: amd_kernel_code_t_minimal + .symbol: amd_kernel_code_t_minimal@kd + .kernarg_segment_size: 8 + .group_segment_fixed_size: 16 + .private_segment_fixed_size: 32 + .kernarg_segment_align: 64 + .wavefront_size: 128 + .sgpr_count: 14 + .vgpr_count: 40 + .max_flat_workgroup_size: 256 +.end_amdgpu_metadata + +// ASM: .amdgpu_metadata +// ASM: amdhsa.kernels: +// ASM: - .group_segment_fixed_size: 16 +// ASM: .kernarg_segment_align: 64 +// ASM: .kernarg_segment_size: 8 +// ASM: .max_flat_workgroup_size: 256 +// ASM: .name: amd_kernel_code_t_test_all +// ASM: .private_segment_fixed_size: 32 +// ASM: .sgpr_count: 14 +// ASM: .symbol: 'amd_kernel_code_t_test_all@kd' +// ASM: .vgpr_count: 40 +// ASM: .wavefront_size: 128 +// ASM: - .group_segment_fixed_size: 16 +// ASM: .kernarg_segment_align: 64 +// ASM: .kernarg_segment_size: 8 +// ASM: .max_flat_workgroup_size: 256 +// ASM: .name: amd_kernel_code_t_minimal +// ASM: .private_segment_fixed_size: 32 +// ASM: .sgpr_count: 14 +// ASM: .symbol: 'amd_kernel_code_t_minimal@kd' +// ASM: .vgpr_count: 40 +// ASM: .wavefront_size: 128 +// ASM: amdhsa.version: +// ASM-NEXT: - 3 +// ASM-NEXT: - 0 +// ASM: .end_amdgpu_metadata Index: llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s =================================================================== --- llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s +++ llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s @@ -1,8 +1,6 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx801 -mattr=-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX10 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts,-xnack -show-encoding %s | FileCheck --check-prefix=GFX9 %s .hsa_code_object_isa // GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" // GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU" -// GFX10: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU" Index: llvm/test/MC/AMDGPU/isa-version-hsa.s =================================================================== --- llvm/test/MC/AMDGPU/isa-version-hsa.s +++ llvm/test/MC/AMDGPU/isa-version-hsa.s @@ -7,7 +7,7 @@ // RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s // OSABI-HSA: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802" -// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line -// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line -// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line +// OSABI-UNK-ERR: error: target id must match options +// OSABI-HSA-ERR: error: target id must match options +// OSABI-PAL-ERR: error: target id must match options .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802" Index: llvm/test/MC/AMDGPU/isa-version-pal.s =================================================================== --- llvm/test/MC/AMDGPU/isa-version-pal.s +++ llvm/test/MC/AMDGPU/isa-version-pal.s @@ -7,7 +7,7 @@ // RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s // OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802" -// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line -// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line -// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line +// OSABI-UNK-ERR: error: target id must match options +// OSABI-HSA-ERR: error: target id must match options +// OSABI-PAL-ERR: error: target id must match options .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802" Index: llvm/test/MC/AMDGPU/isa-version-unk.s =================================================================== --- llvm/test/MC/AMDGPU/isa-version-unk.s +++ llvm/test/MC/AMDGPU/isa-version-unk.s @@ -7,7 +7,7 @@ // RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s // OSABI-UNK: .amd_amdgpu_isa "amdgcn-amd-unknown--gfx802" -// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line -// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line -// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line +// OSABI-UNK-ERR: error: target id must match options +// OSABI-HSA-ERR: error: target id must match options +// OSABI-PAL-ERR: error: target id must match options .amd_amdgpu_isa "amdgcn-amd-unknown--gfx802" Index: llvm/test/MC/AMDGPU/round-trip.s =================================================================== --- llvm/test/MC/AMDGPU/round-trip.s +++ llvm/test/MC/AMDGPU/round-trip.s @@ -1,5 +1,5 @@ -# RUN: llvm-mc -preserve-comments -triple amdgcn-amd-amdhsa %s >%t-1.s -# RUN: llvm-mc -preserve-comments -triple amdgcn-amd-amdhsa %t-1.s >%t-2.s +# RUN: llvm-mc -preserve-comments -triple amdgcn-amd- %s >%t-1.s +# RUN: llvm-mc -preserve-comments -triple amdgcn-amd- %t-1.s >%t-2.s # RUN: diff %t-1.s %t-2.s # Test that AMDGPU assembly round-trips when run through MC; the first Index: llvm/test/Object/AMDGPU/elf-header-flags-sramecc.yaml =================================================================== --- llvm/test/Object/AMDGPU/elf-header-flags-sramecc.yaml +++ llvm/test/Object/AMDGPU/elf-header-flags-sramecc.yaml @@ -9,23 +9,23 @@ # RUN: obj2yaml %t.o.3 | FileCheck --check-prefixes=YAML-SRAM-ECC-XNACK-GFX900 %s # ELF-SRAM-ECC-NONE: Flags [ -# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_SRAM_ECC (0x200) +# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) # ELF-SRAM-ECC-NONE-NEXT: ] # ELF-SRAM-ECC-GFX900: Flags [ +# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) # ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) -# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200) # ELF-SRAM-ECC-GFX900-NEXT: ] # ELF-SRAM-ECC-XNACK-GFX900: Flags [ +# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) +# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100) # ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) -# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200) -# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_XNACK (0x100) # ELF-SRAM-ECC-XNACK-GFX900-NEXT: ] -# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_SRAM_ECC ] -# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ] -# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ] +# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_FEATURE_SRAMECC_V3 ] +# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_SRAMECC_V3 ] +# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_XNACK_V3, EF_AMDGPU_FEATURE_SRAMECC_V3 ] # Doc1 --- !ELF @@ -35,7 +35,7 @@ OSABI: ELFOSABI_NONE Type: ET_REL Machine: EM_AMDGPU - Flags: [ EF_AMDGPU_SRAM_ECC ] + Flags: [ EF_AMDGPU_FEATURE_SRAMECC_V3 ] ... # Doc2 @@ -46,7 +46,7 @@ OSABI: ELFOSABI_NONE Type: ET_REL Machine: EM_AMDGPU - Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ] + Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_SRAMECC_V3 ] ... # Doc3 @@ -57,5 +57,5 @@ OSABI: ELFOSABI_NONE Type: ET_REL Machine: EM_AMDGPU - Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ] + Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_XNACK_V3, EF_AMDGPU_FEATURE_SRAMECC_V3 ] ... Index: llvm/test/Object/AMDGPU/elf-header-flags-xnack.yaml =================================================================== --- llvm/test/Object/AMDGPU/elf-header-flags-xnack.yaml +++ llvm/test/Object/AMDGPU/elf-header-flags-xnack.yaml @@ -6,13 +6,13 @@ # RUN: obj2yaml %t.o.2 | FileCheck --check-prefixes=YAML-XNACK-GFX801 %s # ELF-ALL: Flags [ -# ELF-XNACK-NONE: EF_AMDGPU_XNACK (0x100) +# ELF-XNACK-NONE: EF_AMDGPU_FEATURE_XNACK_V3 (0x100) +# ELF-XNACK-GFX801: EF_AMDGPU_FEATURE_XNACK_V3 (0x100) # ELF-XNACK-GFX801: EF_AMDGPU_MACH_AMDGCN_GFX801 (0x28) -# ELF-XNACK-GFX801: EF_AMDGPU_XNACK (0x100) # ELF-ALL: ] -# YAML-XNACK-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_XNACK ] -# YAML-XNACK-GFX801: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_XNACK ] +# YAML-XNACK-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_FEATURE_XNACK_V3 ] +# YAML-XNACK-GFX801: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_FEATURE_XNACK_V3 ] # Doc1 --- !ELF @@ -22,7 +22,7 @@ OSABI: ELFOSABI_NONE Type: ET_REL Machine: EM_AMDGPU - Flags: [ EF_AMDGPU_XNACK ] + Flags: [ EF_AMDGPU_FEATURE_XNACK_V3 ] ... # Doc2 @@ -33,5 +33,5 @@ OSABI: ELFOSABI_NONE Type: ET_REL Machine: EM_AMDGPU - Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_XNACK ] + Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_FEATURE_XNACK_V3 ] ... Index: llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s =================================================================== --- llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s +++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s @@ -19,11 +19,12 @@ my_kernel.kd: .long 0x00000000 ;; group_segment_fixed_size .long 0x00000000 ;; private_segment_fixed_size - .quad 0x00FF000000000000 ;; reserved bytes. + .long 0x00000000 ;; kernarg_segment_size. + .long 0x00000000 ;; reserved bytes. .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. ;; 20 reserved bytes. - .quad 0x0000000000000000 + .quad 0x00FF000000000000 ;; reserved bytes. .quad 0x0000000000000000 .long 0x00000000 Index: llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s =================================================================== --- llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s +++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s @@ -2,19 +2,19 @@ ; RUN: split-file %s %t.dir -; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1 ; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble ; RUN: diff %t1 %t1-re-assemble -; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2 ; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble ; RUN: diff %t2 %t2-re-assemble -; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 +; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3 ; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble ; RUN: diff %t3 %t3-re-assemble @@ -34,7 +34,7 @@ .amdhsa_next_free_vgpr 0 .amdhsa_next_free_sgpr 0 .amdhsa_reserve_flat_scratch 1 - .amdhsa_reserve_xnack_mask 1 + .amdhsa_reserve_xnack_mask 0 .amdhsa_reserve_vcc 1 .end_amdhsa_kernel @@ -44,6 +44,6 @@ .amdhsa_next_free_vgpr 0 .amdhsa_next_free_sgpr 35 .amdhsa_reserve_flat_scratch 1 - .amdhsa_reserve_xnack_mask 1 + .amdhsa_reserve_xnack_mask 0 .amdhsa_reserve_vcc 1 .end_amdhsa_kernel Index: llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s =================================================================== --- llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s +++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s @@ -2,19 +2,19 @@ ; RUN: split-file %s %t.dir -; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1 ; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble ; RUN: diff %t1 %t1-re-assemble -; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2 ; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble ; RUN: diff %t2 %t2-re-assemble -; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 +; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3 ; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble ; RUN: diff %t3 %t3-re-assemble ;--- 1.s Index: llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s =================================================================== --- llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s +++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s @@ -1,6 +1,6 @@ ;; Entirely zeroed kernel descriptor (for GFX10). -; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t +; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack -filetype=obj -o %t ; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s ;; TODO: Index: llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s =================================================================== --- llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s +++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s @@ -1,8 +1,8 @@ ;; Entirely zeroed kernel descriptor (for GFX9). -; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1 ; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ -; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2 ; RUN: diff %t1 %t2 ; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s Index: llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s =================================================================== --- llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s +++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s @@ -1,6 +1,6 @@ -; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1 ; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ -; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2 ; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s ;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details). Index: llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test =================================================================== --- llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test +++ llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test @@ -1,127 +1,338 @@ -# RUN: yaml2obj %s -o %t -DCPU=GFX600 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX600 -DFLAGS=0x20 +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 -DFLAG_VALUE=0x20 -# RUN: yaml2obj %s -o %t -DCPU=GFX601 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX601 -DFLAGS=0x21 +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 -DFLAG_VALUE=0x20 -# RUN: yaml2obj %s -o %t -DCPU=GFX602 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX602 -DFLAGS=0x3A +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 -DFLAG_VALUE=0x20 -# RUN: yaml2obj %s -o %t -DCPU=GFX700 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX700 -DFLAGS=0x22 +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 -DFLAG_VALUE=0x21 -# RUN: yaml2obj %s -o %t -DCPU=GFX701 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX701 -DFLAGS=0x23 +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 -DFLAG_VALUE=0x21 -# RUN: yaml2obj %s -o %t -DCPU=GFX702 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX702 -DFLAGS=0x24 +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 -DFLAG_VALUE=0x21 -# RUN: yaml2obj %s -o %t -DCPU=GFX703 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX703 -DFLAGS=0x25 +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 -DFLAG_VALUE=0x3A -# RUN: yaml2obj %s -o %t -DCPU=GFX704 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX704 -DFLAGS=0x26 +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 -DFLAG_VALUE=0x3A -# RUN: yaml2obj %s -o %t -DCPU=GFX705 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX705 -DFLAGS=0x3B +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 -DFLAG_VALUE=0x3A -# RUN: yaml2obj %s -o %t -DCPU=GFX801 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX801 -DFLAGS=0x28 +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 -DFLAG_VALUE=0x22 -# RUN: yaml2obj %s -o %t -DCPU=GFX802 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX802 -DFLAGS=0x29 +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 -DFLAG_VALUE=0x22 -# RUN: yaml2obj %s -o %t -DCPU=GFX803 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX803 -DFLAGS=0x2A +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 -DFLAG_VALUE=0x22 -# RUN: yaml2obj %s -o %t -DCPU=GFX805 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX805 -DFLAGS=0x3C +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 -DFLAG_VALUE=0x23 -# RUN: yaml2obj %s -o %t -DCPU=GFX810 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX810 -DFLAGS=0x2B +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 -DFLAG_VALUE=0x23 -# RUN: yaml2obj %s -o %t -DCPU=GFX900 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX900 -DFLAGS=0x2C +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 -DFLAG_VALUE=0x23 -# RUN: yaml2obj %s -o %t -DCPU=GFX902 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX902 -DFLAGS=0x2D +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 -DFLAG_VALUE=0x24 -# RUN: yaml2obj %s -o %t -DCPU=GFX904 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX904 -DFLAGS=0x2E +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 -DFLAG_VALUE=0x24 -# RUN: yaml2obj %s -o %t -DCPU=GFX906 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX906 -DFLAGS=0x2F +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 -DFLAG_VALUE=0x24 -# RUN: yaml2obj %s -o %t -DCPU=GFX908 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX908 -DFLAGS=0x30 +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 -DFLAG_VALUE=0x25 -# RUN: yaml2obj %s -o %t -DCPU=GFX909 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX909 -DFLAGS=0x31 +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 -DFLAG_VALUE=0x25 -# RUN: yaml2obj %s -o %t -DCPU=GFX90A -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX90A -DFLAGS=0x3F +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 -DFLAG_VALUE=0x25 -# RUN: yaml2obj %s -o %t -DCPU=GFX90C -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX90C -DFLAGS=0x32 +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 -DFLAG_VALUE=0x26 -# RUN: yaml2obj %s -o %t -DCPU=GFX1010 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1010 -DFLAGS=0x33 +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 -DFLAG_VALUE=0x26 -# RUN: yaml2obj %s -o %t -DCPU=GFX1011 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1011 -DFLAGS=0x34 +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 -DFLAG_VALUE=0x26 -# RUN: yaml2obj %s -o %t -DCPU=GFX1012 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1012 -DFLAGS=0x35 +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 -DFLAG_VALUE=0x3B -# RUN: yaml2obj %s -o %t -DCPU=GFX1030 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1030 -DFLAGS=0x36 +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 -DFLAG_VALUE=0x3B -# RUN: yaml2obj %s -o %t -DCPU=GFX1031 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1031 -DFLAGS=0x37 +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 -DFLAG_VALUE=0x3B -# RUN: yaml2obj %s -o %t -DCPU=GFX1032 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1032 -DFLAGS=0x38 +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 -DFLAG_VALUE=0x28 -# RUN: yaml2obj %s -o %t -DCPU=GFX1033 -# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1033 -DFLAGS=0x39 +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 -DFLAG_VALUE=0x28 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 -DFLAG_VALUE=0x28 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 -DFLAG_VALUE=0x29 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 -DFLAG_VALUE=0x29 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 -DFLAG_VALUE=0x29 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 -DFLAG_VALUE=0x2A + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 -DFLAG_VALUE=0x2A + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 -DFLAG_VALUE=0x2A + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 -DFLAG_VALUE=0x3C + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 -DFLAG_VALUE=0x3C + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 -DFLAG_VALUE=0x3C + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 -DFLAG_VALUE=0x2B + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 -DFLAG_VALUE=0x2B + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 -DFLAG_VALUE=0x2B + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 -DFLAG_VALUE=0x2C + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 -DFLAG_VALUE=0x2C + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 -DFLAG_VALUE=0x2C + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 -DFLAG_VALUE=0x2D + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 -DFLAG_VALUE=0x2D + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 -DFLAG_VALUE=0x2D + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 -DFLAG_VALUE=0x2E + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 -DFLAG_VALUE=0x2E + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 -DFLAG_VALUE=0x2E + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 -DFLAG_VALUE=0x2F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 -DFLAG_VALUE=0x2F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 -DFLAG_VALUE=0x2F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 -DFLAG_VALUE=0x30 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 -DFLAG_VALUE=0x30 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 -DFLAG_VALUE=0x30 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 -DFLAG_VALUE=0x31 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 -DFLAG_VALUE=0x31 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 -DFLAG_VALUE=0x31 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A -DFLAG_VALUE=0x3F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A -DFLAG_VALUE=0x3F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A -DFLAG_VALUE=0x3F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 -DFLAG_VALUE=0x34 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 -DFLAG_VALUE=0x34 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 -DFLAG_VALUE=0x34 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 -DFLAG_VALUE=0x35 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 -DFLAG_VALUE=0x35 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 -DFLAG_VALUE=0x35 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 -DFLAG_VALUE=0x36 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 -DFLAG_VALUE=0x36 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 -DFLAG_VALUE=0x36 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 -DFLAG_VALUE=0x37 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 -DFLAG_VALUE=0x37 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 -DFLAG_VALUE=0x37 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 -DFLAG_VALUE=0x38 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 -DFLAG_VALUE=0x38 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 -DFLAG_VALUE=0x38 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 -DFLAG_VALUE=0x39 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 -DFLAG_VALUE=0x39 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 -DFLAG_VALUE=0x39 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_V3" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x23F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_V3" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x23F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_ANY_V4" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_OFF_V4" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x23F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_ON_V4" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x33F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_ANY_V4" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 (0x400)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x43F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_OFF_V4" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 (0x800)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x83F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_ON_V4" +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_ON_V4 (0xC00)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0xC3F + +# RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F --- !ELF FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB OSABI: ELFOSABI_AMDGPU_HSA + ABIVersion: [[ABI_VERSION]] Type: ET_REL Machine: EM_AMDGPU - Flags: [ EF_AMDGPU_MACH_AMDGCN_[[CPU]] ] - -# CHECK: File: [[FILE]] -# CHECK-NEXT: Format: elf64-amdgpu -# CHECK-NEXT: Arch: amdgcn -# CHECK-NEXT: AddressSize: 64bit -# CHECK-NEXT: LoadName: -# CHECK-NEXT: ElfHeader { -# CHECK-NEXT: Ident { -# CHECK-NEXT: Magic: (7F 45 4C 46) -# CHECK-NEXT: Class: 64-bit (0x2) -# CHECK-NEXT: DataEncoding: LittleEndian (0x1) -# CHECK-NEXT: FileVersion: 1 -# CHECK-NEXT: OS/ABI: AMDGPU_HSA (0x40) -# CHECK-NEXT: ABIVersion: 0 -# CHECK-NEXT: Unused: (00 00 00 00 00 00 00) -# CHECK-NEXT: } -# CHECK-NEXT: Type: Relocatable (0x1) -# CHECK-NEXT: Machine: EM_AMDGPU (0xE0) -# CHECK-NEXT: Version: 1 -# CHECK-NEXT: Entry: 0x0 -# CHECK-NEXT: ProgramHeaderOffset: 0x0 -# CHECK-NEXT: SectionHeaderOffset: 0x58 -# CHECK-NEXT: Flags [ ([[FLAGS]]) -# CHECK-NEXT: EF_AMDGPU_MACH_AMDGCN_[[CPU]] ([[FLAGS]]) -# CHECK-NEXT: ] -# CHECK-NEXT: HeaderSize: 64 -# CHECK-NEXT: ProgramHeaderEntrySize: 0 -# CHECK-NEXT: ProgramHeaderCount: 0 -# CHECK-NEXT: SectionHeaderEntrySize: 64 -# CHECK-NEXT: SectionHeaderCount: 3 -# CHECK-NEXT: StringTableSectionIndex: 2 -# CHECK-NEXT: } + Flags: [ [[FLAG_NAME]] ] + +# ALL: File: [[FILE]] +# ALL-NEXT: Format: elf64-amdgpu +# ALL-NEXT: Arch: amdgcn +# ALL-NEXT: AddressSize: 64bit +# ALL-NEXT: LoadName: +# ALL-NEXT: ElfHeader { +# ALL-NEXT: Ident { +# ALL-NEXT: Magic: (7F 45 4C 46) +# ALL-NEXT: Class: 64-bit (0x2) +# ALL-NEXT: DataEncoding: LittleEndian (0x1) +# ALL-NEXT: FileVersion: 1 +# ALL-NEXT: OS/ABI: AMDGPU_HSA (0x40) +# ALL-NEXT: ABIVersion: [[ABI_VERSION]] +# ALL-NEXT: Unused: (00 00 00 00 00 00 00) +# ALL-NEXT: } +# ALL-NEXT: Type: Relocatable (0x1) +# ALL-NEXT: Machine: EM_AMDGPU (0xE0) +# ALL-NEXT: Version: 1 +# ALL-NEXT: Entry: 0x0 +# ALL-NEXT: ProgramHeaderOffset: 0x0 +# ALL-NEXT: SectionHeaderOffset: 0x58 +# KNOWN-ABI-VERSION-NEXT: Flags [ ([[FLAG_VALUE]]) +# SINGLE-FLAG-NEXT: [[FLAG_NAME]] ([[FLAG_VALUE]]) +# DOUBLE-FLAG-NEXT: [[FLAG_0]] +# DOUBLE-FLAG-NEXT: [[FLAG_1]] +# KNOWN-ABI-VERSION-NEXT: ] +# UNKNOWN-ABI-VERSION-NEXT: Flags: [[FLAG_VALUE]] +# ALL-NEXT: HeaderSize: 64 +# ALL-NEXT: ProgramHeaderEntrySize: 0 +# ALL-NEXT: ProgramHeaderCount: 0 +# ALL-NEXT: SectionHeaderEntrySize: 64 +# ALL-NEXT: SectionHeaderCount: 3 +# ALL-NEXT: StringTableSectionIndex: 2 +# ALL-NEXT: } Index: llvm/test/tools/llvm-readobj/ELF/note-amd.s =================================================================== --- llvm/test/tools/llvm-readobj/ELF/note-amd.s +++ llvm/test/tools/llvm-readobj/ELF/note-amd.s @@ -6,25 +6,27 @@ // GNU: Displaying notes found in: .note.no.desc // GNU-NEXT: Owner Data size Description -// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) -// GNU-NEXT: HSA Metadata: +// GNU-NEXT: AMD 0x00000000 NT_AMD_HSA_METADATA (AMD HSA Metadata) +// GNU-NEXT: AMD HSA Metadata: // GNU-NEXT: {{^ $}} -// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_ISA (ISA Version) -// GNU-NEXT: ISA Version: +// GNU-NEXT: AMD 0x00000000 NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name) +// GNU-NEXT: AMD HSA ISA Name: // GNU-NEXT: {{^ $}} // GNU-EMPTY: // GNU-NEXT: Displaying notes found in: .note.desc // GNU-NEXT: Owner Data size Description -// GNU-NEXT: AMD 0x0000000a NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) -// GNU-NEXT: HSA Metadata: +// GNU-NEXT: AMD 0x0000000a NT_AMD_HSA_METADATA (AMD HSA Metadata) +// GNU-NEXT: AMD HSA Metadata: // GNU-NEXT: meta_blah -// GNU-NEXT: AMD 0x00000009 NT_AMD_AMDGPU_ISA (ISA Version) -// GNU-NEXT: ISA Version: +// GNU-NEXT: AMD 0x00000009 NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name) +// GNU-NEXT: AMD HSA ISA Name: // GNU-NEXT: isa_blah // GNU-EMPTY: // GNU-NEXT: Displaying notes found in: .note.other // GNU-NEXT: Owner Data size Description -// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata) +// GNU-NEXT: AMD 0x00000000 NT_AMD_PAL_METADATA (AMD PAL Metadata) +// GNU-NEXT: AMD PAL Metadata: +// GNU-NEXT: {{^ $}} // GNU-EMPTY: // GNU-NEXT: Displaying notes found in: .note.unknown // GNU-NEXT: Owner Data size Description @@ -40,14 +42,14 @@ // LLVM-NEXT: Note { // LLVM-NEXT: Owner: AMD // LLVM-NEXT: Data size: 0x0 -// LLVM-NEXT: Type: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) -// LLVM-NEXT: HSA Metadata: +// LLVM-NEXT: Type: NT_AMD_HSA_METADATA (AMD HSA Metadata) +// LLVM-NEXT: AMD HSA Metadata: // LLVM-NEXT: } // LLVM-NEXT: Note { // LLVM-NEXT: Owner: AMD // LLVM-NEXT: Data size: 0x0 -// LLVM-NEXT: Type: NT_AMD_AMDGPU_ISA (ISA Version) -// LLVM-NEXT: ISA Version: +// LLVM-NEXT: Type: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name) +// LLVM-NEXT: AMD HSA ISA Name: // LLVM-NEXT: } // LLVM-NEXT: } // LLVM-NEXT: NoteSection { @@ -57,14 +59,14 @@ // LLVM-NEXT: Note { // LLVM-NEXT: Owner: AMD // LLVM-NEXT: Data size: 0xA -// LLVM-NEXT: Type: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) -// LLVM-NEXT: HSA Metadata: meta_blah +// LLVM-NEXT: Type: NT_AMD_HSA_METADATA (AMD HSA Metadata) +// LLVM-NEXT: AMD HSA Metadata: meta_blah // LLVM-NEXT: } // LLVM-NEXT: Note { // LLVM-NEXT: Owner: AMD // LLVM-NEXT: Data size: 0x9 -// LLVM-NEXT: Type: NT_AMD_AMDGPU_ISA (ISA Version) -// LLVM-NEXT: ISA Version: isa_blah +// LLVM-NEXT: Type: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name) +// LLVM-NEXT: AMD HSA ISA Name: isa_blah // LLVM-NEXT: } // LLVM-NEXT: } // LLVM-NEXT: NoteSection { @@ -74,7 +76,8 @@ // LLVM-NEXT: Note { // LLVM-NEXT: Owner: AMD // LLVM-NEXT: Data size: 0x0 -// LLVM-NEXT: Type: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata) +// LLVM-NEXT: Type: NT_AMD_PAL_METADATA (AMD PAL Metadata) +// LLVM-NEXT: AMD PAL Metadata: // LLVM-NEXT: } // LLVM-NEXT: } // LLVM-NEXT: NoteSection { @@ -96,17 +99,17 @@ .align 4 .long 4 /* namesz */ .long 0 /* descsz */ - .long 10 /* type = NT_AMD_AMDGPU_HSA_METADATA */ + .long 10 /* type = NT_AMD_HSA_METADATA */ .asciz "AMD" .long 4 /* namesz */ .long 0 /* descsz */ - .long 11 /* type = NT_AMD_AMDGPU_ISA */ + .long 11 /* type = NT_AMD_HSA_ISA_NAME */ .asciz "AMD" .section ".note.desc", "a" .align 4 .long 4 /* namesz */ .long end.meta - begin.meta /* descsz */ - .long 10 /* type = NT_AMD_AMDGPU_HSA_METADATA */ + .long 10 /* type = NT_AMD_HSA_METADATA */ .asciz "AMD" begin.meta: .asciz "meta_blah" @@ -114,7 +117,7 @@ .align 4 .long 4 /* namesz */ .long end.isa - begin.isa /* descsz */ - .long 11 /* type = NT_AMD_AMDGPU_ISA */ + .long 11 /* type = NT_AMD_HSA_ISA_NAME */ .asciz "AMD" begin.isa: .asciz "isa_blah" @@ -124,7 +127,7 @@ .align 4 .long 4 /* namesz */ .long 0 /* descsz */ - .long 12 /* type = NT_AMD_AMDGPU_PAL_METADATA */ + .long 12 /* type = NT_AMD_PAL_METADATA */ .asciz "AMD" .section ".note.unknown", "a" .align 4 Index: llvm/tools/llvm-readobj/ELFDumper.cpp =================================================================== --- llvm/tools/llvm-readobj/ELFDumper.cpp +++ llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1428,7 +1428,7 @@ ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6") }; -static const EnumEntry ElfHeaderAMDGPUFlags[] = { +static const EnumEntry ElfHeaderAMDGPUFlagsABIVersion3[] = { LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630), @@ -1475,8 +1475,63 @@ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032), LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033), - LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK), - LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC) + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_V3), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3) +}; + +static const EnumEntry ElfHeaderAMDGPUFlagsABIVersion4[] = { + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RS880), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV670), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV710), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV730), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV770), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CEDAR), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CYPRESS), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_JUNIPER), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_REDWOOD), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_SUMO), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_BARTS), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAICOS), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAYMAN), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_TURKS), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX600), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX601), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX602), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX700), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX701), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX702), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX703), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX704), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX705), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX801), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX802), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX803), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX805), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX810), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX900), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX908), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1030), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ANY_V4), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_OFF_V4), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ON_V4), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ANY_V4), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_OFF_V4), + LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ON_V4) }; static const EnumEntry ElfHeaderRISCVFlags[] = { @@ -4966,15 +5021,95 @@ switch (NoteType) { default: return {"", ""}; - case ELF::NT_AMD_AMDGPU_HSA_METADATA: + case ELF::NT_AMD_HSA_CODE_OBJECT_VERSION: { + struct CodeObjectVersion { + uint32_t MajorVersion; + uint32_t MinorVersion; + }; + if (Desc.size() != sizeof(CodeObjectVersion)) + return {"AMD HSA Code Object Version", + "Invalid AMD HSA Code Object Version"}; + std::string VersionString; + raw_string_ostream StrOS(VersionString); + auto Version = reinterpret_cast(Desc.data()); + StrOS << "[Major: " << Version->MajorVersion + << ", Minor: " << Version->MinorVersion << "]"; + return {"AMD HSA Code Object Version", VersionString}; + } + case ELF::NT_AMD_HSA_HSAIL: { + struct HSAILProperties { + uint32_t HSAILMajorVersion; + uint32_t HSAILMinorVersion; + uint8_t Profile; + uint8_t MachineModel; + uint8_t DefaultFloatRound; + }; + if (Desc.size() != sizeof(HSAILProperties)) + return {"AMD HSA HSAIL Properties", "Invalid AMD HSA HSAIL Properties"}; + auto Properties = reinterpret_cast(Desc.data()); + std::string HSAILPropetiesString; + raw_string_ostream StrOS(HSAILPropetiesString); + StrOS << "[HSAIL Major: " << Properties->HSAILMajorVersion + << ", HSAIL Minor: " << Properties->HSAILMinorVersion + << ", Profile: " << Properties->Profile + << ", Machine Model: " << Properties->MachineModel + << ", Default Float Round: " << Properties->DefaultFloatRound << "]"; + return {"AMD HSA HSAIL Properties", HSAILPropetiesString}; + } + case ELF::NT_AMD_HSA_ISA_VERSION: { + struct IsaVersion { + uint16_t VendorNameSize; + uint16_t ArchitectureNameSize; + uint32_t Major; + uint32_t Minor; + uint32_t Stepping; + }; + if (Desc.size() < sizeof(IsaVersion)) + return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"}; + auto Isa = reinterpret_cast(Desc.data()); + if (Desc.size() < sizeof(IsaVersion) + + Isa->VendorNameSize + Isa->ArchitectureNameSize || + Isa->VendorNameSize == 0 || Isa->ArchitectureNameSize == 0) + return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"}; + std::string IsaString; + raw_string_ostream StrOS(IsaString); + StrOS << "[Vendor: " + << StringRef((const char*)Desc.data() + sizeof(IsaVersion), Isa->VendorNameSize - 1) + << ", Architecture: " + << StringRef((const char*)Desc.data() + sizeof(IsaVersion) + Isa->VendorNameSize, + Isa->ArchitectureNameSize - 1) + << ", Major: " << Isa->Major << ", Minor: " << Isa->Minor + << ", Stepping: " << Isa->Stepping << "]"; + return {"AMD HSA ISA Version", IsaString}; + } + case ELF::NT_AMD_HSA_METADATA: { + if (Desc.size() == 0) + return {"AMD HSA Metadata", ""}; return { - "HSA Metadata", - std::string(reinterpret_cast(Desc.data()), Desc.size())}; - case ELF::NT_AMD_AMDGPU_ISA: + "AMD HSA Metadata", + std::string(reinterpret_cast(Desc.data()), Desc.size() - 1)}; + } + case ELF::NT_AMD_HSA_ISA_NAME: { + if (Desc.size() == 0) + return {"AMD HSA ISA Name", ""}; return { - "ISA Version", + "AMD HSA ISA Name", std::string(reinterpret_cast(Desc.data()), Desc.size())}; } + case ELF::NT_AMD_PAL_METADATA: { + struct PALMetadata { + uint32_t Key; + uint32_t Value; + }; + auto Isa = reinterpret_cast(Desc.data()); + std::string MetadataString; + raw_string_ostream StrOS(MetadataString); + for (size_t I = 0, E = Desc.size() / sizeof(PALMetadata); I < E; ++E) { + StrOS << "[" << Isa[I].Key << ": " << Isa[I].Value << "]"; + } + return {"AMD PAL Metadata", MetadataString}; + } + } } struct AMDGPUNote { @@ -4995,11 +5130,11 @@ return {"", ""}; AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); - std::string HSAMetadataString; + std::string MetadataString; if (!Verifier.verify(MsgPackDoc.getRoot())) - HSAMetadataString = "Invalid AMDGPU Metadata\n"; + MetadataString = "Invalid AMDGPU Metadata\n"; - raw_string_ostream StrOS(HSAMetadataString); + raw_string_ostream StrOS(MetadataString); if (MsgPackDoc.getRoot().isScalar()) { // TODO: passing a scalar root to toYAML() asserts: // (PolymorphicTraits::getKind(Val) != NodeKind::Scalar && @@ -5128,11 +5263,13 @@ }; static const NoteType AMDNoteTypes[] = { - {ELF::NT_AMD_AMDGPU_HSA_METADATA, - "NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"}, - {ELF::NT_AMD_AMDGPU_ISA, "NT_AMD_AMDGPU_ISA (ISA Version)"}, - {ELF::NT_AMD_AMDGPU_PAL_METADATA, - "NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"}, + {ELF::NT_AMD_HSA_CODE_OBJECT_VERSION, + "NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version)"}, + {ELF::NT_AMD_HSA_HSAIL, "NT_AMD_HSA_HSAIL (AMD HSA HSAIL Properties)"}, + {ELF::NT_AMD_HSA_ISA_VERSION, "NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version)"}, + {ELF::NT_AMD_HSA_METADATA, "NT_AMD_HSA_METADATA (AMD HSA Metadata)"}, + {ELF::NT_AMD_HSA_ISA_NAME, "NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)"}, + {ELF::NT_AMD_PAL_METADATA, "NT_AMD_PAL_METADATA (AMD PAL Metadata)"}, }; static const NoteType AMDGPUNoteTypes[] = { @@ -6072,10 +6209,28 @@ W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderMipsFlags), unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI), unsigned(ELF::EF_MIPS_MACH)); - else if (E.e_machine == EM_AMDGPU) - W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAMDGPUFlags), - unsigned(ELF::EF_AMDGPU_MACH)); - else if (E.e_machine == EM_RISCV) + else if (E.e_machine == EM_AMDGPU) { + switch (E.e_ident[ELF::EI_ABIVERSION]) { + default: + W.printHex("Flags", E.e_flags); + break; + case 0: + // ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags. + LLVM_FALLTHROUGH; + case ELF::ELFABIVERSION_AMDGPU_HSA_V3: + W.printFlags("Flags", E.e_flags, + makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion3), + unsigned(ELF::EF_AMDGPU_MACH)); + break; + case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + W.printFlags("Flags", E.e_flags, + makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion4), + unsigned(ELF::EF_AMDGPU_MACH), + unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4), + unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4)); + break; + } + } else if (E.e_machine == EM_RISCV) W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderRISCVFlags)); else W.printFlags("Flags", E.e_flags);