Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -39,6 +39,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter { private: + unsigned CodeObjectVersion; void initializeTargetID(const Module &M); AMDGPUResourceUsageAnalysis *ResourceUsage; @@ -90,6 +91,7 @@ AMDGPUTargetStreamer* getTargetStreamer() const; + bool doInitialization(Module &M) override; bool doFinalization(Module &M) override; bool runOnMachineFunction(MachineFunction &MF) override; Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -89,18 +89,6 @@ std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)) { assert(OutStreamer && "AsmPrinter constructed without streamer"); - - if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { - if (isHsaAbiVersion2(getGlobalSTI())) { - HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2()); - } else if (isHsaAbiVersion3(getGlobalSTI())) { - HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3()); - } else if (isHsaAbiVersion5(getGlobalSTI())) { - HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV5()); - } else { - HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4()); - } - } } StringRef AMDGPUAsmPrinter::getPassName() const { @@ -133,7 +121,7 @@ TM.getTargetTriple().getOS() != Triple::AMDPAL) return; - if (isHsaAbiVersion3AndAbove(getGlobalSTI())) + if (CodeObjectVersion >= 3) getTargetStreamer()->EmitDirectiveAMDGCNTarget(); if (TM.getTargetTriple().getOS() == Triple::AMDHSA) @@ -142,7 +130,7 @@ if (TM.getTargetTriple().getOS() == Triple::AMDPAL) getTargetStreamer()->getPALMetadata()->readFromIR(M); - if (isHsaAbiVersion3AndAbove(getGlobalSTI())) + if (CodeObjectVersion >= 3) return; // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2. @@ -160,8 +148,7 @@ if (!IsTargetStreamerInitialized) initTargetStreamer(M); - if (TM.getTargetTriple().getOS() != Triple::AMDHSA || - isHsaAbiVersion2(getGlobalSTI())) + if (TM.getTargetTriple().getOS() != Triple::AMDHSA || CodeObjectVersion == 2) getTargetStreamer()->EmitISAVersion(); // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA). @@ -221,7 +208,7 @@ if (!MFI.isEntryFunction()) return; - if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) && + if ((STM.isMesaKernel(F) || CodeObjectVersion == 2) && (F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL)) { amd_kernel_code_t KernelCode; @@ -238,8 +225,7 @@ if (!MFI.isEntryFunction()) return; - if (TM.getTargetTriple().getOS() != Triple::AMDHSA || - isHsaAbiVersion2(getGlobalSTI())) + if (TM.getTargetTriple().getOS() != Triple::AMDHSA || CodeObjectVersion == 2) return; auto &Streamer = getTargetStreamer()->getStreamer(); @@ -266,14 +252,15 @@ IsaInfo::getNumExtraSGPRs(&STM, CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed), - CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed); + CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed, + CodeObjectVersion); Streamer.popSection(); } void AMDGPUAsmPrinter::emitFunctionEntryLabel() { if (TM.getTargetTriple().getOS() == Triple::AMDHSA && - isHsaAbiVersion3AndAbove(getGlobalSTI())) { + CodeObjectVersion >=3) { AsmPrinter::emitFunctionEntryLabel(); return; } @@ -343,6 +330,30 @@ AsmPrinter::emitGlobalVariable(GV); } +bool AMDGPUAsmPrinter::doInitialization(Module &M) { + CodeObjectVersion = AMDGPU::getCodeObjectVersion(M); + + if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { + switch (CodeObjectVersion) { + case 2: + HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2()); + break; + case 3: + HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3()); + break; + case 4: + HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4()); + break; + case 5: + HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV5()); + break; + default: + report_fatal_error("Unexpected code object version"); + } + } + return AsmPrinter::doInitialization(M); +} + bool AMDGPUAsmPrinter::doFinalization(Module &M) { // Pad with s_code_end to help tools and guard against instruction prefetch // causing stale data in caches. Arguably this should be done by the linker, @@ -389,7 +400,7 @@ KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; } - if (MFI.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) { + if (MFI.hasQueuePtr() && CodeObjectVersion < 5) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; } @@ -410,10 +421,8 @@ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; } - if (CurrentProgramInfo.DynamicCallStack && - AMDGPU::getAmdhsaCodeObjectVersion() >= 5) { + if (CurrentProgramInfo.DynamicCallStack && CodeObjectVersion >= 5) KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK; - } return KernelCodeProperties; } @@ -1109,7 +1118,7 @@ if (MFI->hasDispatchPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; - if (MFI->hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) + if (MFI->hasQueuePtr() && CodeObjectVersion < 5) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; if (MFI->hasKernargSegmentPtr()) Index: llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -56,8 +56,8 @@ // size is 1 for y/z. static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, - bool HasApertureRegs, bool SupportsGetDoorBellID) { - unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion(); + bool HasApertureRegs, bool SupportsGetDoorBellID, + unsigned CodeObjectVersion) { switch (ID) { case Intrinsic::amdgcn_workitem_id_x: NonKernelOnly = true; @@ -88,7 +88,7 @@ // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access // queue_ptr. case Intrinsic::amdgcn_queue_ptr: - NeedsImplicit = (CodeObjectVersion == 5); + NeedsImplicit = (CodeObjectVersion >= 5); return QUEUE_PTR; case Intrinsic::amdgcn_is_shared: case Intrinsic::amdgcn_is_private: @@ -97,11 +97,11 @@ // Under V5, we need implicitarg_ptr + offsets to access private_base or // shared_base. For pre-V5, however, need to access them through queue_ptr + // offsets. - return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR; + return CodeObjectVersion >= 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR; case Intrinsic::trap: if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4. return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR; - NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5. + NeedsImplicit = (CodeObjectVersion >= 5); // Need impicitarg_ptr under V5. return QUEUE_PTR; default: return NOT_IMPLICIT_INPUT; @@ -137,7 +137,9 @@ AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, BumpPtrAllocator &Allocator, SetVector *CGSCC, TargetMachine &TM) - : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} + : InformationCache(M, AG, Allocator, CGSCC), TM(TM), + CodeObjectVersion(AMDGPU::getCodeObjectVersion(M)) {} + TargetMachine &TM; enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; @@ -165,6 +167,11 @@ return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; } + /// Get code object version. + unsigned getCodeObjectVersion() const { + return CodeObjectVersion; + } + private: /// Check if the ConstantExpr \p CE requires the queue pointer. static bool visitConstExpr(const ConstantExpr *CE) { @@ -221,6 +228,7 @@ private: /// Used to determine if the Constant needs the queue pointer. DenseMap ConstantStatus; + const unsigned CodeObjectVersion; }; struct AAAMDAttributes @@ -411,6 +419,7 @@ auto &InfoCache = static_cast(A.getInfoCache()); bool HasApertureRegs = InfoCache.hasApertureRegs(*F); bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F); + unsigned COV = InfoCache.getCodeObjectVersion(); for (Function *Callee : AAEdges.getOptimisticEdges()) { Intrinsic::ID IID = Callee->getIntrinsicID(); @@ -424,7 +433,7 @@ bool NonKernelOnly = false; ImplicitArgumentMask AttrMask = intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit, - HasApertureRegs, SupportsGetDoorbellID); + HasApertureRegs, SupportsGetDoorbellID, COV); if (AttrMask != NOT_IMPLICIT_INPUT) { if ((IsNonEntryFunc || !NonKernelOnly)) removeAssumedBits(AttrMask); @@ -438,29 +447,29 @@ if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) { // Under V5, we need implicitarg_ptr + offsets to access private_base or // shared_base. We do not actually need queue_ptr. - if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) + if (COV >= 5) removeAssumedBits(IMPLICIT_ARG_PTR); else removeAssumedBits(QUEUE_PTR); } - if (funcRetrievesMultigridSyncArg(A)) { + if (funcRetrievesMultigridSyncArg(A, COV)) { assert(!isAssumed(IMPLICIT_ARG_PTR) && "multigrid_sync_arg needs implicitarg_ptr"); removeAssumedBits(MULTIGRID_SYNC_ARG); } - if (funcRetrievesHostcallPtr(A)) { + if (funcRetrievesHostcallPtr(A, COV)) { assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr"); removeAssumedBits(HOSTCALL_PTR); } - if (funcRetrievesHeapPtr(A)) { + if (funcRetrievesHeapPtr(A, COV)) { assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr"); removeAssumedBits(HEAP_PTR); } - if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) { + if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) { assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr"); removeAssumedBits(QUEUE_PTR); } @@ -469,10 +478,10 @@ removeAssumedBits(LDS_KERNEL_ID); } - if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A)) + if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV)) removeAssumedBits(DEFAULT_QUEUE); - if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A)) + if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV)) removeAssumedBits(COMPLETION_ACTION); return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED @@ -557,39 +566,39 @@ return false; } - bool funcRetrievesMultigridSyncArg(Attributor &A) { - auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(); + bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) { + auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV); AA::RangeTy Range(Pos, 8); return funcRetrievesImplicitKernelArg(A, Range); } - bool funcRetrievesHostcallPtr(Attributor &A) { - auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(); + bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) { + auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV); AA::RangeTy Range(Pos, 8); return funcRetrievesImplicitKernelArg(A, Range); } - bool funcRetrievesDefaultQueue(Attributor &A) { - auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(); + bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) { + auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(COV); AA::RangeTy Range(Pos, 8); return funcRetrievesImplicitKernelArg(A, Range); } - bool funcRetrievesCompletionAction(Attributor &A) { - auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(); + bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) { + auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(COV); AA::RangeTy Range(Pos, 8); return funcRetrievesImplicitKernelArg(A, Range); } - bool funcRetrievesHeapPtr(Attributor &A) { - if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) + bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) { + if (COV < 5) return false; AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8); return funcRetrievesImplicitKernelArg(A, Range); } - bool funcRetrievesQueuePtr(Attributor &A) { - if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) + bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) { + if (COV < 5) return false; AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8); return funcRetrievesImplicitKernelArg(A, Range); Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -466,7 +466,8 @@ CCInfo.AllocateReg(DispatchPtrReg); } - if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) { + const Module *M = MF.getFunction().getParent(); + if (Info.hasQueuePtr() && AMDGPU::getCodeObjectVersion(*M) < 5) { Register QueuePtrReg = Info.addQueuePtr(TRI); MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(QueuePtrReg); Index: llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -84,7 +84,8 @@ msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const; msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const; + const SIProgramInfo &ProgramInfo, + unsigned CodeObjectVersion) const; void emitVersion() override; Index: llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -876,7 +876,8 @@ } msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps( - const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { + const MachineFunction &MF, const SIProgramInfo &ProgramInfo, + unsigned CodeObjectVersion) const { const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); const Function &F = MF.getFunction(); @@ -890,10 +891,11 @@ Kern.getDocument()->getNode(ProgramInfo.LDSSize); Kern[".private_segment_fixed_size"] = Kern.getDocument()->getNode(ProgramInfo.ScratchSize); - if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) + if (CodeObjectVersion >= 5) Kern[".uses_dynamic_stack"] = Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); - if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5 && STM.supportsWGP()) + + if (CodeObjectVersion >= 5 && STM.supportsWGP()) Kern[".workgroup_processor_mode"] = Kern.getDocument()->getNode(ProgramInfo.WgpMode); @@ -945,7 +947,8 @@ void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { auto &Func = MF.getFunction(); - auto Kern = getHSAKernelProps(MF, ProgramInfo); + auto CodeObjectVersion = AMDGPU::getCodeObjectVersion(*Func.getParent()); + auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion); assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || Func.getCallingConv() == CallingConv::SPIR_KERNEL); Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1856,7 +1856,7 @@ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); // For code object version 5, private_base and shared_base are passed through // implicit kernargs. - if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >= 5) { AMDGPUTargetLowering::ImplicitParameter Param = AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPUTargetLowering::SHARED_BASE : AMDGPUTargetLowering::PRIVATE_BASE; @@ -5283,20 +5283,13 @@ ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) return legalizeTrapEndpgm(MI, MRI, B); - if (std::optional HsaAbiVer = AMDGPU::getHsaAbiVersion(&ST)) { - switch (*HsaAbiVer) { - case ELF::ELFABIVERSION_AMDGPU_HSA_V2: - case ELF::ELFABIVERSION_AMDGPU_HSA_V3: - return legalizeTrapHsaQueuePtr(MI, MRI, B); - case ELF::ELFABIVERSION_AMDGPU_HSA_V4: - case ELF::ELFABIVERSION_AMDGPU_HSA_V5: - return ST.supportsGetDoorbellID() ? - legalizeTrapHsa(MI, MRI, B) : - legalizeTrapHsaQueuePtr(MI, MRI, B); - } - } + const Module *M = B.getMF().getFunction().getParent(); + unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M); + if (CodeObjectVersion <=3) + return legalizeTrapHsaQueuePtr(MI, MRI, B); - llvm_unreachable("Unknown trap handler"); + return ST.supportsGetDoorbellID() ? + legalizeTrapHsa(MI, MRI, B) : legalizeTrapHsaQueuePtr(MI, MRI, B); } bool AMDGPULegalizerInfo::legalizeTrapEndpgm( @@ -5313,7 +5306,7 @@ Register SGPR01(AMDGPU::SGPR0_SGPR1); // For code object version 5, queue_ptr is passed through implicit kernarg. - if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >= 5) { AMDGPUTargetLowering::ImplicitParameter Param = AMDGPUTargetLowering::QUEUE_PTR; uint64_t Offset = Index: llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -322,7 +322,7 @@ // TargetPassConfig for subtarget. bool AMDGPULowerKernelAttributes::runOnModule(Module &M) { bool MadeChange = false; - bool IsV5OrAbove = AMDGPU::getAmdhsaCodeObjectVersion() >= 5; + bool IsV5OrAbove = AMDGPU::getCodeObjectVersion(M) >= 5; Function *BasePtr = getBasePtrIntrinsic(M, IsV5OrAbove); if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used. @@ -354,7 +354,7 @@ PreservedAnalyses AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) { - bool IsV5OrAbove = AMDGPU::getAmdhsaCodeObjectVersion() >= 5; + bool IsV5OrAbove = AMDGPU::getCodeObjectVersion(*F.getParent()) >= 5; Function *BasePtr = getBasePtrIntrinsic(*F.getParent(), IsV5OrAbove); if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used. Index: llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -111,7 +111,7 @@ // By default, for code object v5 and later, track only the minimum scratch // size - if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) { + if (AMDGPU::getCodeObjectVersion(M) >= 5) { if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences()) AssumedStackSizeForDynamicSizeObjects = 0; if (!AssumedStackSizeForExternalCall.getNumOccurrences()) Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -543,7 +543,8 @@ return 16; // Assume all implicit inputs are used by default - unsigned NBytes = (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) ? 256 : 56; + const Module *M = F.getParent(); + unsigned NBytes = AMDGPU::getCodeObjectVersion(*M) >= 5 ? 256 : 56; return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes", NBytes); } Index: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5309,7 +5309,8 @@ getTargetStreamer().EmitAmdhsaKernelDescriptor( getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, - ReserveFlatScr); + // TOTO: get code object vesion from a directive??? + ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion()); return false; } Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -93,7 +93,8 @@ virtual void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr){}; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + unsigned CodeObjectVersion){}; static StringRef getArchNameFromElfMach(unsigned ElfMach); static unsigned getElfMach(StringRef GPU); @@ -153,7 +154,8 @@ void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + unsigned CodeObjectVersion) override; }; class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { @@ -213,7 +215,8 @@ void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + unsigned CodeObjectVersion) override; }; } Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -320,7 +320,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, - bool ReserveVCC, bool ReserveFlatScr) { + bool ReserveVCC, bool ReserveFlatScr, unsigned CodeObjectVersion) { IsaVersion IVersion = getIsaVersion(STI.getCPU()); OS << "\t.amdhsa_kernel " << KernelName << '\n'; @@ -367,7 +367,7 @@ PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); - if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) + if (CodeObjectVersion >= 5) PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); PRINT_FIELD(OS, @@ -407,19 +407,17 @@ if (IVersion.Major >= 7 && !ReserveFlatScr && !hasArchitectedFlatScratch(STI)) OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n'; - if (std::optional HsaAbiVer = getHsaAbiVersion(&STI)) { - switch (*HsaAbiVer) { - default: - break; - case ELF::ELFABIVERSION_AMDGPU_HSA_V2: - break; - case ELF::ELFABIVERSION_AMDGPU_HSA_V3: - case ELF::ELFABIVERSION_AMDGPU_HSA_V4: - case ELF::ELFABIVERSION_AMDGPU_HSA_V5: - if (getTargetID()->isXnackSupported()) - OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n'; - break; - } + switch (CodeObjectVersion) { + default: + break; + case 2: + break; + case 3: + case 4: + case 5: + if (getTargetID()->isXnackSupported()) + OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n'; + break; } PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD, @@ -850,7 +848,8 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) { + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + unsigned CodeObjectVersion) { auto &Streamer = getStreamer(); auto &Context = Streamer.getContext(); Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2084,7 +2084,8 @@ if (Info.hasDispatchPtr()) allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr); - if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) + const Module *M = MF.getFunction().getParent(); + if (Info.hasQueuePtr() && AMDGPU::getCodeObjectVersion(*M) < 5) allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr); // Implicit arg ptr takes the place of the kernarg segment pointer. This is a @@ -2134,7 +2135,8 @@ CCInfo.AllocateReg(DispatchPtrReg); } - if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) { + const Module *M = MF.getFunction().getParent(); + if (Info.hasQueuePtr() && AMDGPU::getCodeObjectVersion(*M) < 5) { Register QueuePtrReg = Info.addQueuePtr(TRI); MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(QueuePtrReg); @@ -5433,19 +5435,13 @@ Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) return lowerTrapEndpgm(Op, DAG); - if (std::optional HsaAbiVer = AMDGPU::getHsaAbiVersion(Subtarget)) { - switch (*HsaAbiVer) { - case ELF::ELFABIVERSION_AMDGPU_HSA_V2: - case ELF::ELFABIVERSION_AMDGPU_HSA_V3: - return lowerTrapHsaQueuePtr(Op, DAG); - case ELF::ELFABIVERSION_AMDGPU_HSA_V4: - case ELF::ELFABIVERSION_AMDGPU_HSA_V5: - return Subtarget->supportsGetDoorbellID() ? - lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG); - } - } + const Module *M = DAG.getMachineFunction().getFunction().getParent(); + unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M); + if (CodeObjectVersion <= 3) + return lowerTrapHsaQueuePtr(Op, DAG); - llvm_unreachable("Unknown trap handler"); + return Subtarget->supportsGetDoorbellID() ? lowerTrapHsa(Op, DAG) : + lowerTrapHsaQueuePtr(Op, DAG); } SDValue SITargetLowering::lowerTrapEndpgm( @@ -5473,7 +5469,8 @@ SDValue QueuePtr; // For code object version 5, QueuePtr is passed through implicit kernarg. - if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + const Module *M = DAG.getMachineFunction().getFunction().getParent(); + if (AMDGPU::getCodeObjectVersion(*M) >= 5) { QueuePtr = loadImplicitKernelArgument(DAG, MVT::i64, SL, Align(8), QUEUE_PTR); } else { @@ -5576,7 +5573,8 @@ // For code object version 5, private_base and shared_base are passed through // implicit kernargs. - if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + const Module *M = DAG.getMachineFunction().getFunction().getParent(); + if (AMDGPU::getCodeObjectVersion(*M) >= 5) { ImplicitParameter Param = (AS == AMDGPUAS::LOCAL_ADDRESS) ? SHARED_BASE : PRIVATE_BASE; return loadImplicitKernelArgument(DAG, MVT::i32, DL, Align(4), Param); Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -12,6 +12,7 @@ #include "SIDefines.h" #include "llvm/ADT/FloatingPointMode.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Alignment.h" #include #include @@ -61,17 +62,20 @@ bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr -unsigned getMultigridSyncArgImplicitArgPosition(); +unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV); /// \returns The offset of the hostcall pointer argument from implicitarg_ptr -unsigned getHostcallImplicitArgPosition(); +unsigned getHostcallImplicitArgPosition(unsigned COV); -unsigned getDefaultQueueImplicitArgPosition(); -unsigned getCompletionActionImplicitArgPosition(); +unsigned getDefaultQueueImplicitArgPosition(unsigned COV); +unsigned getCompletionActionImplicitArgPosition(unsigned COV); /// \returns Code object version. unsigned getAmdhsaCodeObjectVersion(); +/// \returns Code object version. +unsigned getCodeObjectVersion(const Module &M); + struct GcnBufferFormatInfo { unsigned Format; unsigned BitsPerComp; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -150,8 +150,18 @@ return AmdhsaCodeObjectVersion; } -unsigned getMultigridSyncArgImplicitArgPosition() { - switch (AmdhsaCodeObjectVersion) { +unsigned getCodeObjectVersion(const Module &M) { + if (auto Ver = mdconst::extract_or_null( + M.getModuleFlag("amdgpu_code_object_version"))) { + return (unsigned)Ver->getZExtValue() / 100; + } + + // Default code object version. + return 4; +} + +unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV) { + switch (COV) { case 2: case 3: case 4: @@ -167,8 +177,8 @@ // FIXME: All such magic numbers about the ABI should be in a // central TD file. -unsigned getHostcallImplicitArgPosition() { - switch (AmdhsaCodeObjectVersion) { +unsigned getHostcallImplicitArgPosition(unsigned COV) { + switch (COV) { case 2: case 3: case 4: @@ -181,8 +191,8 @@ } } -unsigned getDefaultQueueImplicitArgPosition() { - switch (AmdhsaCodeObjectVersion) { +unsigned getDefaultQueueImplicitArgPosition(unsigned COV) { + switch (COV) { case 2: case 3: case 4: @@ -193,8 +203,8 @@ } } -unsigned getCompletionActionImplicitArgPosition() { - switch (AmdhsaCodeObjectVersion) { +unsigned getCompletionActionImplicitArgPosition(unsigned COV) { + switch (COV) { case 2: case 3: case 4: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=GFX8V3 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=GFX8V4 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=GFX8V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefixes=GFX9V3 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=GFX9V4 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=GFX9V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) { ; GFX8V3-LABEL: addrspacecast: @@ -528,3 +528,6 @@ declare i1 @llvm.amdgcn.is.private(ptr) declare void @llvm.trap() declare void @llvm.debugtrap() + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll @@ -1,4 +1,4 @@ -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.dispatch.id() #1 @@ -17,3 +17,6 @@ attributes #0 = { nounwind } attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; FIXME: Error on non-HSA target @@ -15,3 +15,6 @@ declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 attributes #0 = { readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,ALL %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-unknown -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,ALL %s @@ -122,3 +122,6 @@ attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" } attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" } attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; FIXME: Error on non-hsa target @@ -15,3 +15,6 @@ declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 attributes #0 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll @@ -1,9 +1,9 @@ -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2 %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -104,3 +104,6 @@ attributes #0 = { nounwind readnone } attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -1,11 +1,11 @@ -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s -; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 @@ -198,3 +198,6 @@ !0 = !{i32 64, i32 1, i32 1} !1 = !{i32 1, i32 64, i32 1} !2 = !{i32 1, i32 1, i32 64} + +!llvm.module.flags = !{!99} +!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s -; RUN: llc -global-isel -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s ; Test with gfx803 so that ; addrspacecast/llvm.amdgcn.is.shared/llvm.amdgcn.is.private require -; the queue ptr. Tests with code object v3 to test +; the queue ptr. Tests with code object v3 and above to test ; llvm.trap/llvm.debugtrap that require the queue ptr. Index: llvm/test/CodeGen/AMDGPU/addrspacecast.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast: ; HSA: enable_sgpr_private_segment_buffer = 1 @@ -414,3 +414,6 @@ attributes #1 = { nounwind convergent } attributes #2 = { nounwind readnone } attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -1,16 +1,16 @@ -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s -; RUN: llc -march=r600 -mcpu=cypress -disable-promote-alloca-to-vector < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s -check-prefix=R600-VECT -check-prefix=FUNC +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=r600 -mcpu=cypress -disable-promote-alloca-to-vector | FileCheck %s -check-prefix=R600 -check-prefix=FUNC +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=r600 -mcpu=cypress | FileCheck %s -check-prefix=R600-VECT -check-prefix=FUNC ; HSAOPT: @mova_same_clause.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] poison, align 4 ; HSAOPT: @high_alignment.stack = internal unnamed_addr addrspace(3) global [256 x [8 x i32]] poison, align 16 @@ -51,14 +51,14 @@ ; HSAOPT: [[DISPATCH_PTR:%[0-9]+]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() ; HSAOPT: [[GEP0:%[0-9]+]] = getelementptr inbounds i32, ptr addrspace(4) [[DISPATCH_PTR]], i64 1 -; HSAOPT: [[LDXY:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP0]], align 4, !invariant.load !0 +; HSAOPT: [[LDXY:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP0]], align 4, !invariant.load !1 ; HSAOPT: [[GEP1:%[0-9]+]] = getelementptr inbounds i32, ptr addrspace(4) [[DISPATCH_PTR]], i64 2 -; HSAOPT: [[LDZU:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP1]], align 4, !range !1, !invariant.load !0 +; HSAOPT: [[LDZU:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP1]], align 4, !range !2, !invariant.load !1 ; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16 -; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !2 -; HSAOPT: [[WORKITEM_ID_Y:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.y(), !range !2 -; HSAOPT: [[WORKITEM_ID_Z:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.z(), !range !2 +; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !3 +; HSAOPT: [[WORKITEM_ID_Y:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.y(), !range !3 +; HSAOPT: [[WORKITEM_ID_Z:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.z(), !range !3 ; HSAOPT: [[Y_SIZE_X_Z_SIZE:%[0-9]+]] = mul nuw nsw i32 [[EXTRACTY]], [[LDZU]] ; HSAOPT: [[YZ_X_XID:%[0-9]+]] = mul i32 [[Y_SIZE_X_Z_SIZE]], [[WORKITEM_ID_X]] @@ -72,11 +72,11 @@ ; HSAOPT: %arrayidx12 = getelementptr inbounds [5 x i32], ptr addrspace(3) [[LOCAL_GEP]], i32 0, i32 1 -; NOHSAOPT: call i32 @llvm.r600.read.local.size.y(), !range !0 -; NOHSAOPT: call i32 @llvm.r600.read.local.size.z(), !range !0 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !1 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !1 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !1 +; NOHSAOPT: call i32 @llvm.r600.read.local.size.y(), !range !1 +; NOHSAOPT: call i32 @llvm.r600.read.local.size.z(), !range !1 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !2 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !2 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !2 define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 { entry: %stack = alloca [5 x i32], align 4, addrspace(5) @@ -533,9 +533,12 @@ attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" } attributes #1 = { nounwind "amdgpu-flat-work-group-size"="1,256" } -; HSAOPT: !0 = !{} -; HSAOPT: !1 = !{i32 0, i32 257} -; HSAOPT: !2 = !{i32 0, i32 256} +!llvm.module.flags = !{!99} +!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} -; NOHSAOPT: !0 = !{i32 0, i32 257} -; NOHSAOPT: !1 = !{i32 0, i32 256} +; HSAOPT: !1 = !{} +; HSAOPT: !2 = !{i32 0, i32 257} +; HSAOPT: !3 = !{i32 0, i32 256} + +; NOHSAOPT: !1 = !{i32 0, i32 257} +; NOHSAOPT: !2 = !{i32 0, i32 256} Index: llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll +++ llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -verify-machineinstrs -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK-LABEL: {{^}}min_64_max_64: ; CHECK: SGPRBlocks: 0 @@ -129,6 +129,9 @@ } attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"} +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} + ; CHECK: amdhsa.kernels: ; CHECK: .max_flat_workgroup_size: 64 ; CHECK: .name: min_64_max_64 Index: llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll +++ llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -verify-machineinstrs -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=HSAMD %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=HSAMD %s ; CHECK-LABEL: {{^}}min_64_max_64: ; CHECK: SGPRBlocks: 0 @@ -129,6 +129,9 @@ } attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"} +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} + ; HSAMD: NT_AMD_HSA_METADATA (AMD HSA Metadata) ; HSAMD: Version: [ 1, 0 ] ; HSAMD: Kernels: Index: llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN-V5 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s ; Make sure to run a GPU with the SGPR allocation bug. @@ -285,3 +285,6 @@ attributes #0 = { nounwind noinline norecurse } attributes #1 = { nounwind noinline norecurse } attributes #2 = { nounwind noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FIXEDABI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FIXEDABI %s ; GCN-LABEL: {{^}}use_workitem_id_x: ; GCN: s_waitcnt @@ -802,3 +802,6 @@ attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind noinline } attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn --amdhsa-code-object-version=2 -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VMEM -check-prefix=GCN %s -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn --amdhsa-code-object-version=2 -amdgpu-spill-sgpr-to-vgpr=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VGPR -check-prefix=GCN %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VMEM -check-prefix=GCN %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VGPR -check-prefix=GCN %s ; Verify registers used for tracking exec mask changes when all ; registers are spilled at the end of the block. The SGPR spill @@ -270,3 +270,6 @@ attributes #0 = { nounwind } attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/elf-notes.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/elf-notes.ll +++ llvm/test/CodeGen/AMDGPU/elf-notes.ll @@ -1,12 +1,12 @@ -; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=gfx802 --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-UNK %s -; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=iceland --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-UNK %s -; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=gfx802 -filetype=obj --amdhsa-code-object-version=2 < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-UNK-ELF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-HSA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-HSA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj --amdhsa-code-object-version=2 < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-HSA-ELF %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx802 --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-PAL %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=iceland --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-PAL %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx802 -filetype=obj --amdhsa-code-object-version=2 < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-PAL-ELF %s +; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=gfx802 < %s | FileCheck --check-prefix=OSABI-UNK %s +; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=iceland < %s | FileCheck --check-prefix=OSABI-UNK %s +; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=gfx802 -filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-UNK-ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 < %s| FileCheck --check-prefix=OSABI-HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland < %s | FileCheck --check-prefix=OSABI-HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-HSA-ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx802 < %s | FileCheck --check-prefix=OSABI-PAL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=iceland < %s | FileCheck --check-prefix=OSABI-PAL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx802 -filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-PAL-ELF %s ; RUN: llc -march=r600 < %s | FileCheck --check-prefix=R600 %s ; OSABI-UNK-NOT: .hsa_code_object_version @@ -95,3 +95,6 @@ define amdgpu_kernel void @elf_notes() { ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll +++ llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=GCN,COV5 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck -check-prefixes=GCN,COV4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV4 %s @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 @@ -18,3 +18,6 @@ call void %fptr() ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll +++ llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s ; There are no stack objects even though flat is used by default, so @@ -51,3 +51,6 @@ ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -1,26 +1,26 @@ -; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefix=CI -check-prefix=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,GCN %s -; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s -; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,HSA-CI-V2,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK-V2,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=CI,HSA-CI-V2,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK-V2,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s ; GCN-LABEL: {{^}}no_vcc_no_flat: @@ -166,3 +166,6 @@ } attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll +++ llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 -mattr=-xnack < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s ; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind { @@ -6,3 +6,6 @@ ret void } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} + Index: llvm/test/CodeGen/AMDGPU/hsa-default-device.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-default-device.ll +++ llvm/test/CodeGen/AMDGPU/hsa-default-device.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s ; Make sure that with an HSA triple, we don't default to an ; unsupported device. @@ -9,3 +9,5 @@ ret void } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_default_ci: ; GCN: float_mode = 240 @@ -99,3 +99,6 @@ attributes #6 = { nounwind "amdgpu-dx10-clamp"="false" "target-cpu"="fiji" } attributes #7 = { nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" } attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/hsa-func.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-func.ll +++ llvm/test/CodeGen/AMDGPU/hsa-func.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | FileCheck --check-prefix=HSA %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | FileCheck --check-prefix=HSA-CI %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo | FileCheck --check-prefix=HSA %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo | FileCheck --check-prefix=HSA-VI %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -filetype=obj | llvm-readobj --symbols -S --sd - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA-CI %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj --symbols -S --sd - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF ; The SHT_NOTE section contains the output from the .hsa_code_object_* ; directives. @@ -67,3 +67,6 @@ store i32 0, ptr addrspace(1) %out ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; CHECK: - Name: test_ro_arg ; CHECK-NEXT: SymbolName: 'test_ro_arg@kd' @@ -30,3 +30,6 @@ !1 = !{!"none", !"none"} !2 = !{!"float*", !"float*"} !3 = !{!"const restrict", !""} + +!llvm.module.flags = !{!99} +!99 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK: --- ; CHECK: amdhsa.kernels: @@ -150,6 +150,9 @@ attributes #2 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" } attributes #3 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="48" "calls-enqueue-kernel" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} + !1 = !{i32 0} !2 = !{!"none"} !3 = !{!"char"} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK: --- ; CHECK: Version: [ 1, 0 ] @@ -78,6 +78,9 @@ attributes #0 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" } attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" "calls-enqueue-kernel" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} + !1 = !{i32 0} !2 = !{!"none"} !3 = !{!"char"} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s @llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }, { i32, ptr, ptr } { i32 1, ptr @foo.5, ptr null }] @@ -37,3 +37,6 @@ ; CHECK: .name: amdgcn.device.fini ; PARSER: AMDGPU HSA Metadata Parser Test: PASS + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s %struct.A = type { i8, float } %opencl.image1d_t = type opaque @@ -1745,6 +1745,9 @@ attributes #1 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} + !llvm.printf.fmts = !{!100, !101} !1 = !{i32 0} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s %struct.A = type { i8, float } %opencl.image1d_t = type opaque @@ -1870,6 +1870,9 @@ attributes #1 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} + !llvm.printf.fmts = !{!100, !101} !1 = !{i32 0} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s declare void @function1() @@ -292,3 +292,6 @@ attributes #0 = { "amdgpu-no-heap-ptr" } attributes #1 = { nounwind readnone speculatable willreturn } attributes #2 = { noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s ; CHECK: --- ; CHECK: amdhsa.kernels: @@ -296,3 +296,6 @@ attributes #3 = { optnone noinline "amdgpu-implicitarg-num-bytes"="32" } attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll @@ -1,10 +1,10 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s ; CHECK: amdhsa.kernels: @@ -106,8 +106,9 @@ ret void } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} !llvm.printf.fmts = !{!1, !2} - !1 = !{!"1:1:4:%d\5Cn"} !2 = !{!"2:1:8:%g\5Cn"} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s ; CHECK: --- ; CHECK: Version: [ 1, 0 ] @@ -308,3 +308,6 @@ attributes #3 = { optnone noinline "amdgpu-implicitarg-num-bytes"="32" } attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s ; CHECK: --- ; CHECK: amdhsa.kernels: @@ -39,6 +39,8 @@ attributes #0 = { sanitize_address "amdgpu-implicitarg-num-bytes"="48" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} !1 = !{i32 0} !2 = !{!"none"} !3 = !{!"char"} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s declare void @function1() @@ -294,3 +294,6 @@ attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" } attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" } attributes #4 = { noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s declare void @function1() @@ -292,3 +292,6 @@ attributes #0 = { "amdgpu-no-hostcall-ptr" } attributes #1 = { nounwind readnone speculatable willreturn } attributes #2 = { noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s %opencl.image1d_t = type opaque %opencl.image1d_array_t = type opaque @@ -98,6 +98,9 @@ ; CHECK-NEXT: - 1 ; CHECK-NEXT: - 0 +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} + !1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t", !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t", !"image2d_array_msaa_t", !"image2d_array_msaa_depth_t", Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s %opencl.image1d_t = type opaque %opencl.image1d_array_t = type opaque @@ -86,6 +86,8 @@ ret void } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} !1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t", !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t", !"image2d_array_msaa_t", !"image2d_array_msaa_depth_t", Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -9,3 +9,5 @@ ; CHECK: ... !opencl.ocl.version = !{} +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -7,3 +7,5 @@ ; CHECK: ... !opencl.ocl.version = !{} +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -9,4 +9,6 @@ ; CHECK: ... !opencl.ocl.version = !{!0} +!llvm.module.flags = !{!1} !0 = !{} +!1 = !{i32 1, !"amdgpu_code_object_version", i32 300} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -7,4 +7,6 @@ ; CHECK: ... !opencl.ocl.version = !{!0} +!llvm.module.flags = !{!1} !0 = !{} +!1 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -9,4 +9,6 @@ ; CHECK: ... !opencl.ocl.version = !{!0} +!llvm.module.flags = !{!1} !0 = !{i32 1} +!1 = !{i32 1, !"amdgpu_code_object_version", i32 300} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -7,4 +7,6 @@ ; CHECK: ... !opencl.ocl.version = !{!0} +!llvm.module.flags = !{!1} !0 = !{i32 1} +!1 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX700,WAVE64 %s -; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX803,WAVE64 %s -; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX900,WAVE64 %s -; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX1010,WAVE32 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX700,WAVE64 %s +; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX803,WAVE64 %s +; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX900,WAVE64 %s +; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX1010,WAVE32 %s @var = addrspace(1) global float 0.0 @@ -163,3 +163,6 @@ attributes #0 = { "amdgpu-num-sgpr"="14" } attributes #1 = { "amdgpu-num-vgpr"="20" } attributes #2 = { "amdgpu-flat-work-group-size"="1,256" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX700 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX700 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX803 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX900 %s @var = addrspace(1) global float 0.0 @@ -168,3 +168,6 @@ attributes #0 = { "amdgpu-num-sgpr"="14" } attributes #1 = { "amdgpu-num-vgpr"="20" } attributes #2 = { "amdgpu-flat-work-group-size"="1,256" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s declare void @function1() @@ -292,3 +292,6 @@ attributes #0 = { "amdgpu-no-multigrid-sync-arg" } attributes #1 = { nounwind readnone speculatable willreturn } attributes #2 = { noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll @@ -1,10 +1,10 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=CHECK,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=CHECK,GFX9 %s ; On gfx8, the queue ptr is required for this addrspacecast. @@ -76,3 +76,6 @@ declare i1 @llvm.amdgcn.is.private(ptr) declare void @llvm.trap() declare void @llvm.debugtrap() + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s declare void @function1() @@ -292,3 +292,6 @@ attributes #0 = { "amdgpu-no-queue-ptr" } attributes #1 = { nounwind readnone speculatable willreturn } attributes #2 = { noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll @@ -1,10 +1,10 @@ ; Note: uses a randomly selected assumed external call stack size so that the ; test assertions are unlikely to succeed by accident. -; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX7 %s -; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx803 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX8 %s -; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx900 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX9 %s -; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx1010 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX10 %s +; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX7 %s +; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX8 %s +; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX9 %s +; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX10 %s ; CHECK-LABEL: amdhsa.kernels @@ -135,3 +135,6 @@ } attributes #0 = { norecurse } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 < %s | FileCheck %s ; CHECK: --- ; CHECK: amdhsa.kernels: @@ -28,3 +28,6 @@ } attributes #0 = { "uniform-work-group-size"="true" } attributes #1 = { "uniform-work-group-size"="false" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-workgroup-processor-mode-v5.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-metadata-workgroup-processor-mode-v5.ll +++ llvm/test/CodeGen/AMDGPU/hsa-metadata-workgroup-processor-mode-v5.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-CU %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10-CU %s ; GFX10: .amdhsa_workgroup_processor_mode 0 ; GFX10: .workgroup_processor_mode: 0 @@ -12,3 +12,6 @@ entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} Index: llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -1,38 +1,38 @@ -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx600 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI600 %s -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx601 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI601 %s -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx602 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI602 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI702 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI705 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI801 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI801 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX900 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX901 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX902 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX903 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX904 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX905 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX906 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX907 %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx600 | FileCheck --check-prefixes=NONHSA-SI600 %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx601 | FileCheck --check-prefixes=NONHSA-SI601 %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx602 | FileCheck --check-prefixes=NONHSA-SI602 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=HSA,HSA-CI700 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefixes=HSA,HSA-CI700 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefixes=HSA,HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefixes=HSA,HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefixes=HSA,HSA-CI702 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefixes=HSA,HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefixes=HSA,HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefixes=HSA,HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 | FileCheck --check-prefixes=HSA,HSA-CI704 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire | FileCheck --check-prefixes=HSA,HSA-CI704 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 | FileCheck --check-prefixes=HSA,HSA-CI705 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 | FileCheck --check-prefixes=HSA,HSA-VI801 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI801 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 | FileCheck --check-prefixes=HSA,HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 | FileCheck --check-prefixes=HSA,HSA-VI805 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro | FileCheck --check-prefixes=HSA,HSA-VI805 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefixes=HSA,HSA-VI810 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney | FileCheck --check-prefixes=HSA,HSA-VI810 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX900 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=HSA,HSA-GFX901 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX902 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefixes=HSA,HSA-GFX903 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX904 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 | FileCheck --check-prefixes=HSA,HSA-GFX905 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX906 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=HSA,HSA-GFX907 %s ; HSA: .hsa_code_object_version 2,1 ; NONHSA-SI600: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx600" @@ -57,3 +57,6 @@ ; HSA-GFX905: .hsa_code_object_isa 9,0,5,"AMD","AMDGPU" ; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU" ; HSA-GFX907: .hsa_code_object_isa 9,0,7,"AMD","AMDGPU" + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/hsa.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa.ll +++ llvm/test/CodeGen/AMDGPU/hsa.ll @@ -1,13 +1,14 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=HSA %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=-flat-for-global --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=HSA-CI %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=HSA %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=HSA-VI %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj --amdhsa-code-object-version=2 --amdgpu-lower-module-lds-strategy=module | llvm-readobj -S --sd --syms - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 --amdgpu-lower-module-lds-strategy=module | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s + +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=HSA %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=HSA-CI %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=HSA %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=HSA-VI %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj --amdgpu-lower-module-lds-strategy=module | llvm-readobj -S --sd --syms - | FileCheck --check-prefix=ELF %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdgpu-lower-module-lds-strategy=module | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 --amdgpu-lower-module-lds-strategy=module | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s ; The SHT_NOTE section contains the output from the .hsa_code_object_* ; directives. @@ -84,3 +85,6 @@ store volatile i32 0, ptr addrspace(1) undef ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll +++ llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn define amdgpu_kernel void @get_local_size_x(ptr addrspace(1) %out) #0 { Index: llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll +++ llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=GFX8V3 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=GFX8V4 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=GFX8V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefixes=GFX9V3 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=GFX9V4 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=GFX9V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) { ; GFX8V3-LABEL: addrspacecast: @@ -515,3 +515,6 @@ declare i1 @llvm.amdgcn.is.private(ptr) declare void @llvm.trap() declare void @llvm.debugtrap() + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll +++ llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck --check-prefixes=CHECK %s ; CHECK-LABEL: test_unaligned_to_eight: @@ -56,3 +56,6 @@ ; CHECK-LABEL: .name: test_aligned_to_eight declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} Index: llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll +++ llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor --amdhsa-code-object-version=4 < %s | FileCheck -check-prefixes=CHECK,V4 %s -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor --amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=CHECK,V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor | FileCheck -check-prefixes=CHECK,V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor | FileCheck -check-prefixes=CHECK,V5 %s declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 @@ -224,6 +224,10 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} + + ;. ; V4: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; V4: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } @@ -238,3 +242,7 @@ ; V5: attributes #[[ATTR3]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } ; V5: attributes #[[ATTR4]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } ;. +; V4: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 400} +;. +; V5: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500} +;. Index: llvm/test/CodeGen/AMDGPU/indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 @@ -1913,3 +1913,6 @@ tail call amdgpu_gfx void %fptr() ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/kernarg-size.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/kernarg-size.ll +++ llvm/test/CodeGen/AMDGPU/kernarg-size.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=DOORBELL %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=DOORBELL %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=HSA %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA %s declare void @llvm.trap() #0 @@ -27,3 +27,6 @@ store volatile i32 2, ptr addrspace(1) %arg0 ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll +++ llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -amdgpu-ir-lower-kernel-arguments=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,HSA-VI,FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -amdgpu-ir-lower-kernel-arguments=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,HSA-VI,FUNC %s ; Repeat of some problematic tests in kernel-args.ll, with the IR ; argument lowering pass disabled. Struct padding needs to be @@ -274,3 +274,6 @@ store i32 %in, ptr addrspace(1) undef, align 4 ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll +++ llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -1,10 +1,10 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck --check-prefixes=GCN,CI,ALL %s ; RUN: llc -march=amdgcn -mcpu=carrizo --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,VI,ALL %s ; RUN: llc -march=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,GFX9,ALL %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s -; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global,-architected-flat-scratch,-user-sgpr-init16-bug < %s | FileCheck --check-prefixes=GCNHSA,ALL %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s +; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global,-architected-flat-scratch,-user-sgpr-init16-bug < %s | FileCheck --check-prefixes=GCNHSA,ALL %s ; FIXME: align on alloca seems to be ignored for private_segment_alignment @@ -68,3 +68,6 @@ } attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/lds-alignment.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lds-alignment.ll +++ llvm/test/CodeGen/AMDGPU/lds-alignment.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=HSA %s @lds.align16.0 = internal unnamed_addr addrspace(3) global [38 x i8] undef, align 16 @lds.align16.1 = internal unnamed_addr addrspace(3) global [38 x i8] undef, align 16 @@ -227,3 +227,6 @@ attributes #0 = { argmemonly nounwind } attributes #1 = { nounwind } attributes #2 = { convergent nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/lds-size.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lds-size.ll +++ llvm/test/CodeGen/AMDGPU/lds-size.ll @@ -1,5 +1,5 @@ -; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=ALL -check-prefix=EG %s ; This test makes sure we do not double count global values when they are @@ -34,3 +34,6 @@ endif: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.dispatch.id() #1 @@ -17,3 +17,6 @@ attributes #0 = { nounwind } attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: not llc -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target @@ -31,3 +31,6 @@ declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 attributes #0 = { readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,COV5 %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,COV5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,MESA %s ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty: ; HSA: enable_sgpr_kernarg_segment_ptr = 1 @@ -391,3 +391,6 @@ attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" } attributes #2 = { nounwind readnone speculatable } attributes #3 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s ; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s @@ -116,3 +116,6 @@ attributes #1 = { nounwind } attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" } attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: not llc -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target @@ -16,3 +16,6 @@ declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 attributes #0 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll @@ -1,9 +1,9 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2 %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -104,3 +104,6 @@ attributes #0 = { nounwind readnone } attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll @@ -1,11 +1,11 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 @@ -132,6 +132,9 @@ attributes #0 = { nounwind readnone } attributes #1 = { nounwind } +!llvm.module.flags = !{!3} + !0 = !{i32 64, i32 1, i32 1} !1 = !{i32 1, i32 64, i32 1} !2 = !{i32 1, i32 1, i32 64} +!3 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll +++ llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll @@ -1,4 +1,4 @@ -; RUN: not llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s 2>&1 | FileCheck %s ; CHECK: in function pixel_s{{.*}}: unsupported non-compute shaders with HSA define amdgpu_ps void @pixel_shader() #0 { @@ -14,3 +14,6 @@ define amdgpu_gs void @geometry_shader() #0 { ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=DEFAULTSIZE,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=DEFAULTSIZE-V5,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=ASSUME1024,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 -amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=ASSUME1024,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=DEFAULTSIZE,FLATSCR %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=ASSUME1024,FLATSCR %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=DEFAULTSIZE,MUBUF %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=DEFAULTSIZE-V5,MUBUF %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=ASSUME1024,MUBUF %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=ASSUME1024,MUBUF %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch | FileCheck -check-prefixes=DEFAULTSIZE,FLATSCR %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=ASSUME1024,FLATSCR %s ; FIXME: Generated test checks do not check metadata at the end of the ; function, so this also includes manually added checks. @@ -402,3 +402,6 @@ attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/nop-data.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/nop-data.ll +++ llvm/test/CodeGen/AMDGPU/nop-data.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s ; CHECK: : ; CHECK: s_endpgm @@ -85,3 +85,6 @@ entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/private-element-size.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/private-element-size.ll +++ llvm/test/CodeGen/AMDGPU/private-element-size.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT16,ALL %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-8 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT8,ALL,HSA-ELTGE8 %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT4,ALL %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT16,ALL %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-8 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT8,ALL,HSA-ELTGE8 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT4,ALL %s ; ALL-LABEL: {{^}}private_elt_size_v4i32: @@ -247,3 +247,6 @@ attributes #0 = { nounwind } attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll +++ llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=+promote-alloca < %s | FileCheck -check-prefix=NOOPTS -check-prefix=ALL %s -; RUN: llc -O1 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=+promote-alloca < %s | FileCheck -check-prefix=NOOPTS -check-prefix=ALL %s +; RUN: llc -O1 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s ; ALL-LABEL: {{^}}promote_alloca_i32_array_array: ; NOOPTS: workgroup_group_segment_byte_size = 0{{$}} @@ -34,3 +34,6 @@ attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" } attributes #1 = { nounwind optnone noinline "amdgpu-flat-work-group-size"="64,64" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll +++ llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -disable-promote-alloca-to-vector -amdgpu-enable-lower-module-lds=0 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -disable-promote-alloca-to-vector -amdgpu-enable-lower-module-lds=0 < %s | FileCheck -check-prefix=GCN %s ; This shows that the amount LDS size estimate should try to not be ; sensitive to the order of the LDS globals. This should try to @@ -127,3 +127,6 @@ } attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,7" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/recursion.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/recursion.ll +++ llvm/test/CodeGen/AMDGPU/recursion.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs --amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=V5 %s ; CHECK-LABEL: {{^}}recursive: ; CHECK: ScratchSize: 16 @@ -76,3 +76,6 @@ call void @tail_recursive_with_stack() ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll +++ llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -o - %s | FileCheck -check-prefix=GCN-V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - | FileCheck -check-prefix=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - | FileCheck -check-prefix=GCN-V5 %s ; Make sure there's no assertion when trying to report the resource ; usage for a function which becomes dead during codegen. @@ -34,3 +34,6 @@ bb2: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; Make sure the stack is never realigned for entry functions. @@ -315,3 +315,6 @@ attributes #0 = { nounwind } attributes #1 = { nounwind "stackrealign" } attributes #2 = { nounwind alignstack=128 } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} Index: llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll +++ llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll @@ -7,3 +7,6 @@ ; GFX90C-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx90c with XNACK being ON or ANY ; GFX940-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx940 + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900 @@ -28,3 +28,6 @@ entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 @@ -27,3 +27,6 @@ entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -30,3 +30,5 @@ } attributes #0 = { "target-features"="-xnack" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -30,3 +30,6 @@ } attributes #0 = { "target-features"="+xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -30,3 +30,6 @@ } attributes #0 = { "target-features"="-xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -30,3 +30,6 @@ } attributes #0 = { "target-features"="-xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -30,3 +30,6 @@ } attributes #0 = { "target-features"="+xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -30,3 +30,6 @@ } attributes #0 = { "target-features"="+xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll +++ llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll @@ -1,4 +1,4 @@ -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s 2>&1 | FileCheck --check-prefixes=ERR %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck --check-prefixes=ERR %s ; ERR: error: xnack setting of 'func2' function does not match module xnack setting @@ -19,3 +19,6 @@ attributes #0 = { "target-features"="-xnack" } attributes #1 = { "target-features"="+xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll +++ llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900 @@ -18,3 +18,6 @@ entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll +++ llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 @@ -17,3 +17,6 @@ entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll +++ llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -20,3 +20,6 @@ } attributes #0 = { "target-features"="-xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll +++ llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -20,3 +20,6 @@ } attributes #0 = { "target-features"="+xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: llvm/test/CodeGen/AMDGPU/trap-abis.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/trap-abis.ll +++ llvm/test/CodeGen/AMDGPU/trap-abis.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V2 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V3 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V4 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V2 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V3 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V4 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V2 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V3 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V4 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V2 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V3 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V4 %s declare void @llvm.trap() #0 declare void @llvm.debugtrap() #1 @@ -1167,3 +1167,6 @@ attributes #0 = { nounwind noreturn } attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/trap.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/trap.ll +++ llvm/test/CodeGen/AMDGPU/trap.ll @@ -1,12 +1,12 @@ -; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s -; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s -; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s -; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s -; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s -; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s -; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s -; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s ; enable trap handler feature ; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s @@ -123,3 +123,6 @@ attributes #0 = { nounwind noreturn } attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} Index: llvm/test/CodeGen/AMDGPU/unsupported-code-object-version.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/unsupported-code-object-version.ll @@ -0,0 +1,8 @@ +; RUN: sed 's/CODE_OBJECT_VERSION/0/g' %s | not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 2>&1 | FileCheck --check-prefix=HSA-ERROR %s +; RUN: sed 's/CODE_OBJECT_VERSION/100/g' %s | not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 2>&1 | FileCheck --check-prefix=HSA-ERROR %s +; RUN: sed 's/CODE_OBJECT_VERSION/9900/g' %s | not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 2>&1 | FileCheck --check-prefix=HSA-ERROR %s + +; HSA-ERROR: Unexpected code object version + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} Index: llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll +++ llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll @@ -2,8 +2,8 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s -; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s -; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s ; This ends up using all 256 registers and requires register ; scavenging which will fail to find an unsued register.