diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -82,7 +82,7 @@ /// code object version times 100. enum CodeObjectVersionKind { COV_None, - COV_2 = 200, + COV_2 = 200, // Unsupported. COV_3 = 300, COV_4 = 400, COV_5 = 500, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3995,9 +3995,9 @@ def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, Flags<[CC1Option]>, - Values<"none,2,3,4,5">, + Values<"none,3,4,5">, NormalizedValuesScope<"TargetOptions">, - NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>, + NormalizedValues<["COV_None", "COV_3", "COV_4", "COV_5"]>, MarshallingInfoEnum, "COV_4">; defm cumode : SimpleMFlag<"cumode", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2299,7 +2299,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - const unsigned MinCodeObjVer = 2; + const unsigned MinCodeObjVer = 3; const unsigned MaxCodeObjVer = 5; if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) { diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -3,9 +3,6 @@ // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -o - %s | FileCheck %s -check-prefix=V4 -// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s - // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=3 -o - %s | FileCheck -check-prefix=V3 %s @@ -21,7 +18,6 @@ // RUN: not %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=4.1 -o - %s 2>&1| FileCheck %s -check-prefix=INV -// V2: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 200} // V3: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 300} // V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400} // V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/clang/test/Driver/hip-code-object-version.hip b/clang/test/Driver/hip-code-object-version.hip --- a/clang/test/Driver/hip-code-object-version.hip +++ b/clang/test/Driver/hip-code-object-version.hip @@ -1,15 +1,5 @@ // REQUIRES: amdgpu-registered-target -// Check bundle ID for code object v2. - -// RUN: not %clang -### --target=x86_64-linux-gnu \ -// RUN: -mcode-object-version=2 \ -// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \ -// RUN: %s 2>&1 | FileCheck -check-prefix=V2 %s - -// V2: "-mllvm" "--amdhsa-code-object-version=2" -// V2: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx906" - // Check bundle ID for code object v3. // RUN: not %clang -### --target=x86_64-linux-gnu \ @@ -61,9 +51,16 @@ // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -mcode-object-version=1 \ // RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \ -// RUN: %s 2>&1 | FileCheck -check-prefix=INVALID %s -// INVALID: error: invalid integral value '1' in '-mcode-object-version=1' -// INVALID-NOT: error: invalid integral value +// RUN: %s 2>&1 | FileCheck -check-prefix=INVALID_1 %s +// INVALID_1: error: invalid integral value '1' in '-mcode-object-version=1' +// INVALID_1-NOT: error: invalid integral value + +// RUN: not %clang -### --target=x86_64-linux-gnu \ +// RUN: -mcode-object-version=2 \ +// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=INVALID_2 %s +// INVALID_2: error: invalid integral value '2' in '-mcode-object-version=2' +// INVALID_2-NOT: error: invalid integral value // Check LLVM code object version option --amdhsa-code-object-version // is passed to -cc1 and -cc1as, and -mcode-object-version is passed diff --git a/lld/test/ELF/Inputs/amdgpu-kernel-0.s b/lld/test/ELF/Inputs/amdgpu-kernel-0.s --- a/lld/test/ELF/Inputs/amdgpu-kernel-0.s +++ b/lld/test/ELF/Inputs/amdgpu-kernel-0.s @@ -1,6 +1,5 @@ .text .globl kernel_0 .align 64 -.amdgpu_hsa_kernel kernel_0 kernel_0: s_endpgm diff --git a/lld/test/ELF/Inputs/amdgpu-kernel-1.s b/lld/test/ELF/Inputs/amdgpu-kernel-1.s --- a/lld/test/ELF/Inputs/amdgpu-kernel-1.s +++ b/lld/test/ELF/Inputs/amdgpu-kernel-1.s @@ -1,6 +1,5 @@ .text .globl kernel_1 .align 64 -.amdgpu_hsa_kernel kernel_1 kernel_1: s_endpgm diff --git a/lld/test/ELF/amdgpu-abi-version-err.s b/lld/test/ELF/amdgpu-abi-version-err.s --- a/lld/test/ELF/amdgpu-abi-version-err.s +++ b/lld/test/ELF/amdgpu-abi-version-err.s @@ -1,6 +1,6 @@ # REQUIRES: amdgpu -# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s -o %t-0.o -# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -filetype=obj %s -o %t-1.o +# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -filetype=obj %s -o %t-0.o +# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj %s -o %t-1.o # RUN: not ld.lld -shared %t-0.o %t-1.o -o /dev/null 2>&1 | FileCheck %s # CHECK: ld.lld: error: incompatible ABI version: {{.*}}-1.o diff --git a/lld/test/ELF/amdgpu-elf-flags-err.s b/lld/test/ELF/amdgpu-elf-flags-err.s --- a/lld/test/ELF/amdgpu-elf-flags-err.s +++ b/lld/test/ELF/amdgpu-elf-flags-err.s @@ -1,6 +1,6 @@ # REQUIRES: amdgpu -# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 -filetype=obj %S/Inputs/amdgpu-kernel-0.s -o %t-0.o -# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -filetype=obj %S/Inputs/amdgpu-kernel-1.s -o %t-1.o +# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=4 -filetype=obj %S/Inputs/amdgpu-kernel-0.s -o %t-0.o +# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 -filetype=obj %S/Inputs/amdgpu-kernel-1.s -o %t-1.o # RUN: not ld.lld -shared %t-0.o %t-1.o -o /dev/null 2>&1 | FileCheck %s -# CHECK: error: incompatible e_flags: {{.*}}-1.o +# CHECK: error: incompatible mach: {{.*}}-1.o diff --git a/lld/test/ELF/amdgpu-elf-flags.s b/lld/test/ELF/amdgpu-elf-flags.s --- a/lld/test/ELF/amdgpu-elf-flags.s +++ b/lld/test/ELF/amdgpu-elf-flags.s @@ -1,6 +1,6 @@ # REQUIRES: amdgpu -# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -filetype=obj %S/Inputs/amdgpu-kernel-0.s -o %t-0.o -# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -filetype=obj %S/Inputs/amdgpu-kernel-1.s -o %t-1.o +# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 -filetype=obj %S/Inputs/amdgpu-kernel-0.s -o %t-0.o +# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 -filetype=obj %S/Inputs/amdgpu-kernel-1.s -o %t-1.o # RUN: ld.lld -shared %t-0.o %t-1.o -o %t.so # RUN: llvm-readobj --file-headers %t.so | FileCheck --check-prefix=FIRSTLINK %s diff --git a/lld/test/ELF/amdgpu-kernels.s b/lld/test/ELF/amdgpu-kernels.s deleted file mode 100644 --- a/lld/test/ELF/amdgpu-kernels.s +++ /dev/null @@ -1,56 +0,0 @@ -# REQUIRES: amdgpu -# RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 %s -o %t.o -# RUN: ld.lld -shared %t.o -o %t -# RUN: llvm-readobj --sections --symbols -l %t | FileCheck %s - -.hsa_code_object_version 1,0 -.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" - -.text -.globl kernel0 -.align 256 -.amdgpu_hsa_kernel kernel0 -kernel0: - s_endpgm -.Lfunc_end0: - .size kernel0, .Lfunc_end0-kernel0 - -.globl kernel1 -.align 256 -.amdgpu_hsa_kernel kernel1 -kernel1: - s_endpgm - s_endpgm -.Lfunc_end1: - .size kernel1, .Lfunc_end1-kernel1 - - -# CHECK: Section { -# CHECK: Name: .text -# CHECK: Type: SHT_PROGBITS -# CHECK: Flags [ (0x6) -# CHECK: SHF_ALLOC (0x2) -# CHECK: SHF_EXECINSTR (0x4) -# CHECK: ] -# CHECK: } - -# CHECK: ProgramHeader { -# CHECK: Type: PT_LOAD - -# CHECK: Symbol { -# CHECK: Name: kernel0 -# CHECK: Value: -# CHECK: Size: 4 -# CHECK: Binding: Global -# CHECK: Type: AMDGPU_HSA_KERNEL -# CHECK: Section: .text -# CHECK: } - -# CHECK: Symbol { -# CHECK: Name: kernel1 -# CHECK: Value: -# CHECK: Size: 8 -# CHECK: Binding: Global -# CHECK: Type: AMDGPU_HSA_KERNEL -# CHECK: Section: .text -# CHECK: } diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1622,8 +1622,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. warning:: - Code object V2 is not the default code object version emitted by - this version of LLVM. + Code object V2 generation is no longer supported by this version of LLVM. The AMDGPU backend code object uses the following ELF note record in the ``.note`` section when compiling for code object V2. @@ -2891,8 +2890,7 @@ +++++++++++++++++++++++ .. warning:: - Code object V2 is not the default code object version emitted by this version - of LLVM. + Code object V2 generation is no longer supported by this version of LLVM. Code object V2 metadata is specified by the ``NT_AMD_HSA_METADATA`` note record (see :ref:`amdgpu-note-records-v2`). @@ -14852,8 +14850,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. warning:: - Code object V2 is not the default code object version emitted by - this version of LLVM. + Code object V2 generation is no longer supported by this version of LLVM. The AMDGPU assembler defines and updates some symbols automatically. These symbols do not affect code generation. @@ -14908,8 +14905,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~ .. warning:: - Code object V2 is not the default code object version emitted by - this version of LLVM. + Code object V2 generation is no longer supported by this version of LLVM. AMDGPU ABI defines auxiliary data in output code object. In assembly source, one can specify them with assembler directives. @@ -14984,8 +14980,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. warning:: - Code Object V2 is not the default code object version emitted by - this version of LLVM. + Code object V2 generation is no longer supported by this version of LLVM. Here is an example of a minimal assembly source file, defining one HSA kernel: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -121,26 +121,13 @@ TM.getTargetTriple().getOS() != Triple::AMDPAL) return; - if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3) - getTargetStreamer()->EmitDirectiveAMDGCNTarget(); + getTargetStreamer()->EmitDirectiveAMDGCNTarget(); if (TM.getTargetTriple().getOS() == Triple::AMDHSA) HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID()); if (TM.getTargetTriple().getOS() == Triple::AMDPAL) getTargetStreamer()->getPALMetadata()->readFromIR(M); - - if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3) - return; - - // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2. - if (TM.getTargetTriple().getOS() == Triple::AMDHSA) - getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); - - // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2. - IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU()); - getTargetStreamer()->EmitDirectiveHSACodeObjectISAV2( - Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU"); } void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) { @@ -148,8 +135,7 @@ if (!IsTargetStreamerInitialized) initTargetStreamer(M); - if (TM.getTargetTriple().getOS() != Triple::AMDHSA || - CodeObjectVersion == AMDGPU::AMDHSA_COV2) + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) getTargetStreamer()->EmitISAVersion(); // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA). @@ -209,7 +195,7 @@ if (!MFI.isEntryFunction()) return; - if ((STM.isMesaKernel(F) || CodeObjectVersion == AMDGPU::AMDHSA_COV2) && + if (STM.isMesaKernel(F) && (F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL)) { amd_kernel_code_t KernelCode; @@ -226,8 +212,7 @@ if (!MFI.isEntryFunction()) return; - if (TM.getTargetTriple().getOS() != Triple::AMDHSA || - CodeObjectVersion == AMDGPU::AMDHSA_COV2) + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; auto &Streamer = getTargetStreamer()->getStreamer(); @@ -261,8 +246,7 @@ } void AMDGPUAsmPrinter::emitFunctionEntryLabel() { - if (TM.getTargetTriple().getOS() == Triple::AMDHSA && - CodeObjectVersion >= AMDGPU::AMDHSA_COV3) { + if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { AsmPrinter::emitFunctionEntryLabel(); return; } @@ -337,9 +321,6 @@ if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { switch (CodeObjectVersion) { - case AMDGPU::AMDHSA_COV2: - HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2()); - break; case AMDGPU::AMDHSA_COV3: HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3()); break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -159,82 +159,6 @@ ~MetadataStreamerMsgPackV5() = default; }; -// TODO: Rename MetadataStreamerV2 -> MetadataStreamerYamlV2. -class MetadataStreamerYamlV2 final : public MetadataStreamer { -private: - Metadata HSAMetadata; - - void dump(StringRef HSAMetadataString) const; - - void verify(StringRef HSAMetadataString) const; - - AccessQualifier getAccessQualifier(StringRef AccQual) const; - - AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const; - - ValueKind getValueKind(Type *Ty, StringRef TypeQual, - StringRef BaseTypeName) const; - - std::string getTypeName(Type *Ty, bool Signed) const; - - std::vector getWorkGroupDimensions(MDNode *Node) const; - - Kernel::CodeProps::Metadata getHSACodeProps( - const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const; - Kernel::DebugProps::Metadata getHSADebugProps( - const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const; - - void emitPrintf(const Module &Mod); - - void emitKernelLanguage(const Function &Func); - - void emitKernelAttrs(const Function &Func); - - void emitKernelArgs(const Function &Func, const GCNSubtarget &ST); - - void emitKernelArg(const Argument &Arg); - - void emitKernelArg(const DataLayout &DL, Type *Ty, Align Alignment, - ValueKind ValueKind, - MaybeAlign PointeeAlign = std::nullopt, - StringRef Name = "", StringRef TypeName = "", - StringRef BaseTypeName = "", StringRef AccQual = "", - StringRef TypeQual = ""); - - void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST); - - const Metadata &getHSAMetadata() const { - return HSAMetadata; - } - -protected: - void emitVersion() override; - void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, - msgpack::ArrayDocNode Args) override { - llvm_unreachable("Dummy override should not be invoked!"); - } - void emitKernelAttrs(const Function &Func, - msgpack::MapDocNode Kern) override { - llvm_unreachable("Dummy override should not be invoked!"); - } - -public: - MetadataStreamerYamlV2() = default; - ~MetadataStreamerYamlV2() = default; - - bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; - - void begin(const Module &Mod, - const IsaInfo::AMDGPUTargetID &TargetID) override; - - void end() override; - - void emitKernel(const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) override; -}; - } // end namespace HSAMD } // end namespace AMDGPU } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -48,435 +48,6 @@ namespace AMDGPU { namespace HSAMD { -//===----------------------------------------------------------------------===// -// HSAMetadataStreamerV2 -//===----------------------------------------------------------------------===// -void MetadataStreamerYamlV2::dump(StringRef HSAMetadataString) const { - errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; -} - -void MetadataStreamerYamlV2::verify(StringRef HSAMetadataString) const { - errs() << "AMDGPU HSA Metadata Parser Test: "; - - HSAMD::Metadata FromHSAMetadataString; - if (fromString(HSAMetadataString, FromHSAMetadataString)) { - errs() << "FAIL\n"; - return; - } - - std::string ToHSAMetadataString; - if (toString(FromHSAMetadataString, ToHSAMetadataString)) { - errs() << "FAIL\n"; - return; - } - - errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") - << '\n'; - if (HSAMetadataString != ToHSAMetadataString) { - errs() << "Original input: " << HSAMetadataString << '\n' - << "Produced output: " << ToHSAMetadataString << '\n'; - } -} - -AccessQualifier -MetadataStreamerYamlV2::getAccessQualifier(StringRef AccQual) const { - if (AccQual.empty()) - return AccessQualifier::Unknown; - - return StringSwitch(AccQual) - .Case("read_only", AccessQualifier::ReadOnly) - .Case("write_only", AccessQualifier::WriteOnly) - .Case("read_write", AccessQualifier::ReadWrite) - .Default(AccessQualifier::Default); -} - -AddressSpaceQualifier -MetadataStreamerYamlV2::getAddressSpaceQualifier(unsigned AddressSpace) const { - switch (AddressSpace) { - case AMDGPUAS::PRIVATE_ADDRESS: - return AddressSpaceQualifier::Private; - case AMDGPUAS::GLOBAL_ADDRESS: - return AddressSpaceQualifier::Global; - case AMDGPUAS::CONSTANT_ADDRESS: - return AddressSpaceQualifier::Constant; - case AMDGPUAS::LOCAL_ADDRESS: - return AddressSpaceQualifier::Local; - case AMDGPUAS::FLAT_ADDRESS: - return AddressSpaceQualifier::Generic; - case AMDGPUAS::REGION_ADDRESS: - return AddressSpaceQualifier::Region; - default: - return AddressSpaceQualifier::Unknown; - } -} - -ValueKind MetadataStreamerYamlV2::getValueKind(Type *Ty, StringRef TypeQual, - StringRef BaseTypeName) const { - if (TypeQual.contains("pipe")) - return ValueKind::Pipe; - - return StringSwitch(BaseTypeName) - .Case("image1d_t", ValueKind::Image) - .Case("image1d_array_t", ValueKind::Image) - .Case("image1d_buffer_t", ValueKind::Image) - .Case("image2d_t", ValueKind::Image) - .Case("image2d_array_t", ValueKind::Image) - .Case("image2d_array_depth_t", ValueKind::Image) - .Case("image2d_array_msaa_t", ValueKind::Image) - .Case("image2d_array_msaa_depth_t", ValueKind::Image) - .Case("image2d_depth_t", ValueKind::Image) - .Case("image2d_msaa_t", ValueKind::Image) - .Case("image2d_msaa_depth_t", ValueKind::Image) - .Case("image3d_t", ValueKind::Image) - .Case("sampler_t", ValueKind::Sampler) - .Case("queue_t", ValueKind::Queue) - .Default(isa(Ty) ? - (Ty->getPointerAddressSpace() == - AMDGPUAS::LOCAL_ADDRESS ? - ValueKind::DynamicSharedPointer : - ValueKind::GlobalBuffer) : - ValueKind::ByValue); -} - -std::string MetadataStreamerYamlV2::getTypeName(Type *Ty, bool Signed) const { - switch (Ty->getTypeID()) { - case Type::IntegerTyID: { - if (!Signed) - return (Twine('u') + getTypeName(Ty, true)).str(); - - auto BitWidth = Ty->getIntegerBitWidth(); - switch (BitWidth) { - case 8: - return "char"; - case 16: - return "short"; - case 32: - return "int"; - case 64: - return "long"; - default: - return (Twine('i') + Twine(BitWidth)).str(); - } - } - case Type::HalfTyID: - return "half"; - case Type::FloatTyID: - return "float"; - case Type::DoubleTyID: - return "double"; - case Type::FixedVectorTyID: { - auto VecTy = cast(Ty); - auto ElTy = VecTy->getElementType(); - auto NumElements = VecTy->getNumElements(); - return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); - } - default: - return "unknown"; - } -} - -std::vector -MetadataStreamerYamlV2::getWorkGroupDimensions(MDNode *Node) const { - std::vector Dims; - if (Node->getNumOperands() != 3) - return Dims; - - for (auto &Op : Node->operands()) - Dims.push_back(mdconst::extract(Op)->getZExtValue()); - return Dims; -} - -Kernel::CodeProps::Metadata MetadataStreamerYamlV2::getHSACodeProps( - const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { - const GCNSubtarget &STM = MF.getSubtarget(); - const SIMachineFunctionInfo &MFI = *MF.getInfo(); - HSAMD::Kernel::CodeProps::Metadata HSACodeProps; - const Function &F = MF.getFunction(); - - assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || - F.getCallingConv() == CallingConv::SPIR_KERNEL); - - Align MaxKernArgAlign; - HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, - MaxKernArgAlign); - HSACodeProps.mKernargSegmentAlign = - std::max(MaxKernArgAlign, Align(4)).value(); - - HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; - HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; - HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); - HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; - HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; - HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); - HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; - HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); - HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); - HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); - - return HSACodeProps; -} - -Kernel::DebugProps::Metadata MetadataStreamerYamlV2::getHSADebugProps( - const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { - return HSAMD::Kernel::DebugProps::Metadata(); -} - -void MetadataStreamerYamlV2::emitVersion() { - auto &Version = HSAMetadata.mVersion; - - Version.push_back(VersionMajorV2); - Version.push_back(VersionMinorV2); -} - -void MetadataStreamerYamlV2::emitPrintf(const Module &Mod) { - auto &Printf = HSAMetadata.mPrintf; - - auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); - if (!Node) - return; - - for (auto *Op : Node->operands()) - if (Op->getNumOperands()) - Printf.push_back( - std::string(cast(Op->getOperand(0))->getString())); -} - -void MetadataStreamerYamlV2::emitKernelLanguage(const Function &Func) { - auto &Kernel = HSAMetadata.mKernels.back(); - - // TODO: What about other languages? - auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); - if (!Node || !Node->getNumOperands()) - return; - auto Op0 = Node->getOperand(0); - if (Op0->getNumOperands() <= 1) - return; - - Kernel.mLanguage = "OpenCL C"; - Kernel.mLanguageVersion.push_back( - mdconst::extract(Op0->getOperand(0))->getZExtValue()); - Kernel.mLanguageVersion.push_back( - mdconst::extract(Op0->getOperand(1))->getZExtValue()); -} - -void MetadataStreamerYamlV2::emitKernelAttrs(const Function &Func) { - auto &Attrs = HSAMetadata.mKernels.back().mAttrs; - - if (auto Node = Func.getMetadata("reqd_work_group_size")) - Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); - if (auto Node = Func.getMetadata("work_group_size_hint")) - Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); - if (auto Node = Func.getMetadata("vec_type_hint")) { - Attrs.mVecTypeHint = getTypeName( - cast(Node->getOperand(0))->getType(), - mdconst::extract(Node->getOperand(1))->getZExtValue()); - } - if (Func.hasFnAttribute("runtime-handle")) { - Attrs.mRuntimeHandle = - Func.getFnAttribute("runtime-handle").getValueAsString().str(); - } -} - -void MetadataStreamerYamlV2::emitKernelArgs(const Function &Func, - const GCNSubtarget &ST) { - for (auto &Arg : Func.args()) - emitKernelArg(Arg); - - emitHiddenKernelArgs(Func, ST); -} - -void MetadataStreamerYamlV2::emitKernelArg(const Argument &Arg) { - auto Func = Arg.getParent(); - auto ArgNo = Arg.getArgNo(); - const MDNode *Node; - - StringRef Name; - Node = Func->getMetadata("kernel_arg_name"); - if (Node && ArgNo < Node->getNumOperands()) - Name = cast(Node->getOperand(ArgNo))->getString(); - else if (Arg.hasName()) - Name = Arg.getName(); - - StringRef TypeName; - Node = Func->getMetadata("kernel_arg_type"); - if (Node && ArgNo < Node->getNumOperands()) - TypeName = cast(Node->getOperand(ArgNo))->getString(); - - StringRef BaseTypeName; - Node = Func->getMetadata("kernel_arg_base_type"); - if (Node && ArgNo < Node->getNumOperands()) - BaseTypeName = cast(Node->getOperand(ArgNo))->getString(); - - StringRef AccQual; - if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && - Arg.hasNoAliasAttr()) { - AccQual = "read_only"; - } else { - Node = Func->getMetadata("kernel_arg_access_qual"); - if (Node && ArgNo < Node->getNumOperands()) - AccQual = cast(Node->getOperand(ArgNo))->getString(); - } - - StringRef TypeQual; - Node = Func->getMetadata("kernel_arg_type_qual"); - if (Node && ArgNo < Node->getNumOperands()) - TypeQual = cast(Node->getOperand(ArgNo))->getString(); - - const DataLayout &DL = Func->getParent()->getDataLayout(); - - MaybeAlign PointeeAlign; - if (auto PtrTy = dyn_cast(Arg.getType())) { - if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { - // FIXME: Should report this for all address spaces - PointeeAlign = Arg.getParamAlign().valueOrOne(); - } - } - - Type *ArgTy; - Align ArgAlign; - std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); - - emitKernelArg(DL, ArgTy, ArgAlign, - getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name, - TypeName, BaseTypeName, AccQual, TypeQual); -} - -void MetadataStreamerYamlV2::emitKernelArg( - const DataLayout &DL, Type *Ty, Align Alignment, ValueKind ValueKind, - MaybeAlign PointeeAlign, StringRef Name, StringRef TypeName, - StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) { - HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); - auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); - - Arg.mName = std::string(Name); - Arg.mTypeName = std::string(TypeName); - Arg.mSize = DL.getTypeAllocSize(Ty); - Arg.mAlign = Alignment.value(); - Arg.mValueKind = ValueKind; - Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0; - - if (auto PtrTy = dyn_cast(Ty)) - Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); - - Arg.mAccQual = getAccessQualifier(AccQual); - - // TODO: Emit Arg.mActualAccQual. - - SmallVector SplitTypeQuals; - TypeQual.split(SplitTypeQuals, " ", -1, false); - for (StringRef Key : SplitTypeQuals) { - auto P = StringSwitch(Key) - .Case("const", &Arg.mIsConst) - .Case("restrict", &Arg.mIsRestrict) - .Case("volatile", &Arg.mIsVolatile) - .Case("pipe", &Arg.mIsPipe) - .Default(nullptr); - if (P) - *P = true; - } -} - -void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func, - const GCNSubtarget &ST) { - unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); - if (!HiddenArgNumBytes) - return; - - auto &DL = Func.getParent()->getDataLayout(); - auto Int64Ty = Type::getInt64Ty(Func.getContext()); - - if (HiddenArgNumBytes >= 8) - emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX); - if (HiddenArgNumBytes >= 16) - emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY); - if (HiddenArgNumBytes >= 24) - emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ); - - auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), - AMDGPUAS::GLOBAL_ADDRESS); - - if (HiddenArgNumBytes >= 32) { - // We forbid the use of features requiring hostcall when compiling OpenCL - // before code object V5, which makes the mutual exclusion between the - // "printf buffer" and "hostcall buffer" here sound. - if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); - else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer); - else - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); - } - - // Emit "default queue" and "completion action" arguments if enqueue kernel is - // used, otherwise emit dummy "none" arguments. - if (HiddenArgNumBytes >= 40) { - if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue); - } else { - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); - } - } - - if (HiddenArgNumBytes >= 48) { - if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction); - } else { - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); - } - } - - // Emit the pointer argument for multi-grid object. - if (HiddenArgNumBytes >= 56) { - if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg); - else - emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); - } -} - -bool MetadataStreamerYamlV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { - return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); -} - -void MetadataStreamerYamlV2::begin(const Module &Mod, - const IsaInfo::AMDGPUTargetID &TargetID) { - emitVersion(); - emitPrintf(Mod); -} - -void MetadataStreamerYamlV2::end() { - std::string HSAMetadataString; - if (toString(HSAMetadata, HSAMetadataString)) - return; - - if (DumpHSAMetadata) - dump(HSAMetadataString); - if (VerifyHSAMetadata) - verify(HSAMetadataString); -} - -void MetadataStreamerYamlV2::emitKernel(const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) { - auto &Func = MF.getFunction(); - if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) - return; - - auto CodeProps = getHSACodeProps(MF, ProgramInfo); - auto DebugProps = getHSADebugProps(MF, ProgramInfo); - - HSAMetadata.mKernels.push_back(Kernel::Metadata()); - auto &Kernel = HSAMetadata.mKernels.back(); - - const GCNSubtarget &ST = MF.getSubtarget(); - Kernel.mName = std::string(Func.getName()); - Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); - emitKernelLanguage(Func); - emitKernelAttrs(Func); - emitKernelArgs(Func, ST); - HSAMetadata.mKernels.back().mCodeProps = CodeProps; - HSAMetadata.mKernels.back().mDebugProps = DebugProps; -} - //===----------------------------------------------------------------------===// // HSAMetadataStreamerV3 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1344,7 +1344,7 @@ // AsmParser::parseDirectiveSet() cannot be specialized for specific target. AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); MCContext &Ctx = getContext(); - if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { + if (ISA.Major >= 6 && isHsaAbi(getSTI())) { MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); @@ -1361,7 +1361,7 @@ Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); } - if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { + if (ISA.Major >= 6 && isHsaAbi(getSTI())) { initializeGprCountSymbol(IS_VGPR); initializeGprCountSymbol(IS_SGPR); } else @@ -2855,7 +2855,7 @@ if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { return nullptr; } - if (isHsaAbiVersion3AndAbove(&getSTI())) { + if (isHsaAbi(getSTI())) { if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) return nullptr; } else @@ -4888,7 +4888,7 @@ if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) return TokError("directive only supported for amdgcn architecture"); - if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) + if (!isHsaAbi(getSTI())) return TokError("directive only supported for amdhsa OS"); StringRef KernelName; @@ -5419,19 +5419,14 @@ } bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { - const char *AssemblerDirectiveBegin; - const char *AssemblerDirectiveEnd; - std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = - isHsaAbiVersion3AndAbove(&getSTI()) - ? std::pair(HSAMD::V3::AssemblerDirectiveBegin, - HSAMD::V3::AssemblerDirectiveEnd) - : std::pair(HSAMD::AssemblerDirectiveBegin, - HSAMD::AssemblerDirectiveEnd); - - if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { - return Error(getLoc(), - (Twine(AssemblerDirectiveBegin) + Twine(" directive is " - "not available on non-amdhsa OSes")).str()); + const char *AssemblerDirectiveBegin = HSAMD::V3::AssemblerDirectiveBegin; + const char *AssemblerDirectiveEnd = HSAMD::V3::AssemblerDirectiveEnd; + + if (!isHsaAbi(getSTI())) { + return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) + + Twine(" directive is " + "not available on non-amdhsa OSes")) + .str()); } std::string HSAMetadataString; @@ -5439,13 +5434,8 @@ HSAMetadataString)) return true; - if (isHsaAbiVersion3AndAbove(&getSTI())) { - if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) - return Error(getLoc(), "invalid HSA metadata"); - } else { - if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) - return Error(getLoc(), "invalid HSA metadata"); - } + if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) + return Error(getLoc(), "invalid HSA metadata"); return false; } @@ -5588,7 +5578,7 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); - if (isHsaAbiVersion3AndAbove(&getSTI())) { + if (isHsaAbi(getSTI())) { if (IDVal == ".amdhsa_kernel") return ParseDirectiveAMDHSAKernel(); @@ -7704,7 +7694,7 @@ // TODO: Should try to check code object version from directive??? AMDGPU::getAmdhsaCodeObjectVersion()); - if (isHsaAbiVersion3AndAbove(&getSTI())) + if (isHsaAbi(getSTI())) getTargetStreamer().EmitDirectiveAMDGCNTarget(); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -418,8 +418,6 @@ switch (CodeObjectVersion) { default: break; - case AMDGPU::AMDHSA_COV2: - break; case AMDGPU::AMDHSA_COV3: case AMDGPU::AMDHSA_COV4: case AMDGPU::AMDHSA_COV5: @@ -539,7 +537,7 @@ unsigned NoteFlags = 0; // TODO Apparently, this is currently needed for OpenCL as mentioned in // https://reviews.llvm.org/D74995 - if (STI.getTargetTriple().getOS() == Triple::AMDHSA) + if (isHsaAbi(STI)) NoteFlags = ELF::SHF_ALLOC; S.pushSection(); @@ -598,11 +596,10 @@ } unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() { - assert(STI.getTargetTriple().getOS() == Triple::AMDHSA); + assert(isHsaAbi(STI)); if (std::optional HsaAbiVer = getHsaAbiVersion(&STI)) { switch (*HsaAbiVer) { - case ELF::ELFABIVERSION_AMDGPU_HSA_V2: case ELF::ELFABIVERSION_AMDGPU_HSA_V3: return getEFlagsV3(); case ELF::ELFABIVERSION_AMDGPU_HSA_V4: diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -43,17 +43,15 @@ struct IsaVersion; enum { - AMDHSA_COV2 = 2, AMDHSA_COV3 = 3, AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 }; +/// \returns True if \p STI is AMDHSA. +bool isHsaAbi(const MCSubtargetInfo &STI); /// \returns HSA OS ABI Version identification. std::optional getHsaAbiVersion(const MCSubtargetInfo *STI); -/// \returns True if HSA OS ABI Version identification is 2, -/// false otherwise. -bool isHsaAbiVersion2(const MCSubtargetInfo *STI); /// \returns True if HSA OS ABI Version identification is 3, /// false otherwise. bool isHsaAbiVersion3(const MCSubtargetInfo *STI); @@ -63,9 +61,6 @@ /// \returns True if HSA OS ABI Version identification is 5, /// false otherwise. bool isHsaAbiVersion5(const MCSubtargetInfo *STI); -/// \returns True if HSA OS ABI Version identification is 3 and above, -/// false otherwise. -bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -118,13 +118,16 @@ namespace AMDGPU { +/// \returns True if \p STI is AMDHSA. +bool isHsaAbi(const MCSubtargetInfo &STI) { + return STI.getTargetTriple().getOS() == Triple::AMDHSA; +} + std::optional getHsaAbiVersion(const MCSubtargetInfo *STI) { if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA) return std::nullopt; switch (AmdhsaCodeObjectVersion) { - case 2: - return ELF::ELFABIVERSION_AMDGPU_HSA_V2; case 3: return ELF::ELFABIVERSION_AMDGPU_HSA_V3; case 4: @@ -137,12 +140,6 @@ } } -bool isHsaAbiVersion2(const MCSubtargetInfo *STI) { - if (std::optional HsaAbiVer = getHsaAbiVersion(STI)) - return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2; - return false; -} - bool isHsaAbiVersion3(const MCSubtargetInfo *STI) { if (std::optional HsaAbiVer = getHsaAbiVersion(STI)) return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3; @@ -161,11 +158,6 @@ return false; } -bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) { - return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) || - isHsaAbiVersion5(STI); -} - unsigned getAmdhsaCodeObjectVersion() { return AmdhsaCodeObjectVersion; } @@ -182,7 +174,6 @@ unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV2: case AMDHSA_COV3: case AMDHSA_COV4: return 48; @@ -197,7 +188,6 @@ // central TD file. unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV2: case AMDHSA_COV3: case AMDHSA_COV4: return 24; @@ -209,7 +199,6 @@ unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV2: case AMDHSA_COV3: case AMDHSA_COV4: return 32; @@ -221,7 +210,6 @@ unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV2: case AMDHSA_COV3: case AMDHSA_COV4: return 40; @@ -764,54 +752,6 @@ std::string Features; if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { switch (CodeObjectVersion) { - case AMDGPU::AMDHSA_COV2: - // Code object V2 only supported specific processors and had fixed - // settings for the XNACK. - if (Processor == "gfx600") { - } else if (Processor == "gfx601") { - } else if (Processor == "gfx602") { - } else if (Processor == "gfx700") { - } else if (Processor == "gfx701") { - } else if (Processor == "gfx702") { - } else if (Processor == "gfx703") { - } else if (Processor == "gfx704") { - } else if (Processor == "gfx705") { - } else if (Processor == "gfx801") { - if (!isXnackOnOrAny()) - report_fatal_error( - "AMD GPU code object V2 does not support processor " + - Twine(Processor) + " without XNACK"); - } else if (Processor == "gfx802") { - } else if (Processor == "gfx803") { - } else if (Processor == "gfx805") { - } else if (Processor == "gfx810") { - if (!isXnackOnOrAny()) - report_fatal_error( - "AMD GPU code object V2 does not support processor " + - Twine(Processor) + " without XNACK"); - } else if (Processor == "gfx900") { - if (isXnackOnOrAny()) - Processor = "gfx901"; - } else if (Processor == "gfx902") { - if (isXnackOnOrAny()) - Processor = "gfx903"; - } else if (Processor == "gfx904") { - if (isXnackOnOrAny()) - Processor = "gfx905"; - } else if (Processor == "gfx906") { - if (isXnackOnOrAny()) - Processor = "gfx907"; - } else if (Processor == "gfx90c") { - if (isXnackOnOrAny()) - report_fatal_error( - "AMD GPU code object V2 does not support processor " + - Twine(Processor) + " with XNACK being ON or ANY"); - } else { - report_fatal_error( - "AMD GPU code object V2 does not support processor " + - Twine(Processor)); - } - break; case AMDGPU::AMDHSA_COV3: // xnack. if (isXnackOnOrAny()) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll @@ -3,12 +3,10 @@ declare i64 @llvm.amdgcn.dispatch.id() #1 ; GCN-LABEL: {{^}}dispatch_id: -; GCN: .amd_kernel_code_t -; GCN: enable_sgpr_dispatch_id = 1 - ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]] +; GCN: .amdhsa_user_sgpr_dispatch_id 1 define amdgpu_kernel void @dispatch_id(ptr addrspace(1) %out) #0 { %tmp0 = call i64 @llvm.amdgcn.dispatch.id() store i64 %tmp0, ptr addrspace(1) %out @@ -19,4 +17,4 @@ attributes #1 = { nounwind readnone } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll @@ -3,8 +3,8 @@ ; FIXME: Error on non-HSA target ; GCN-LABEL: {{^}}test: -; GCN: enable_sgpr_dispatch_ptr = 1 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 +; GCN: .amdhsa_user_sgpr_dispatch_ptr 1 define amdgpu_kernel void @test(ptr addrspace(1) %out) { %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 %value = load i32, ptr addrspace(4) %dispatch_ptr @@ -17,4 +17,4 @@ attributes #0 = { readnone } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll @@ -1,15 +1,15 @@ -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,ALL %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,HSA,ALL %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,OS-MESA3D,ALL %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-unknown -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,ALL %s ; ALL-LABEL: {{^}}test: -; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 8 -; HSA: kernarg_segment_alignment = 4 - -; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa +; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 1 +; CO-V4: s_load_dword s{{[0-9]+}}, s[4:5], 0xa ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa + +; HSA: .amdhsa_kernarg_size 8 +; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1 define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 { %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 @@ -19,13 +19,14 @@ } ; ALL-LABEL: {{^}}test_implicit: -; HSA: kernarg_segment_byte_size = 8 ; OS-MESA3D: kernarg_segment_byte_size = 24 -; CO-V2: kernarg_segment_alignment = 4 +; OS-MESA3D: kernarg_segment_alignment = 4 ; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x15 + +; HSA: .amdhsa_kernarg_size 8 define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 { %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10 @@ -35,16 +36,16 @@ } ; ALL-LABEL: {{^}}test_implicit_alignment: -; HSA: kernarg_segment_byte_size = 12 ; OS-MESA3D: kernarg_segment_byte_size = 28 -; CO-V2: kernarg_segment_alignment = 4 - +; OS-MESA3D: kernarg_segment_alignment = 4 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 ; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] ; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] + +; HSA: .amdhsa_kernarg_size 12 define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 { %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %val = load i32, ptr addrspace(4) %implicitarg.ptr @@ -53,16 +54,16 @@ } ; ALL-LABEL: {{^}}opencl_test_implicit_alignment -; HSA: kernarg_segment_byte_size = 64 ; OS-MESA3D: kernarg_segment_byte_size = 28 -; CO-V2: kernarg_segment_alignment = 4 - +; OS-MESA3D: kernarg_segment_alignment = 4 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 ; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] ; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] + +; HSA: .amdhsa_kernarg_size 64 define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 { %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %val = load i32, ptr addrspace(4) %implicitarg.ptr @@ -71,12 +72,15 @@ } ; ALL-LABEL: {{^}}test_no_kernargs: -; CO-V2: enable_sgpr_kernarg_segment_ptr = 0 -; CO-V2: kernarg_segment_byte_size = 0 -; CO-V2: kernarg_segment_alignment = 4 +; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 0 +; OS-MESA3D: kernarg_segment_byte_size = 0 +; OS-MESA3D: kernarg_segment_alignment = 4 ; HSA: s_mov_b64 [[OFFSET_NULL:s\[[0-9]+:[0-9]+\]]], 40{{$}} ; HSA: s_load_dword s{{[0-9]+}}, [[OFFSET_NULL]] + +; HSA: .amdhsa_kernarg_size 0 +; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 0 define amdgpu_kernel void @test_no_kernargs() #1 { %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 @@ -86,9 +90,9 @@ } ; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs: -; HSA: kernarg_segment_byte_size = 48 ; OS-MESA3D: kernarg_segment_byte_size = 16 -; CO-V2: kernarg_segment_alignment = 4 +; OS-MESA3D: kernarg_segment_alignment = 4 +; HSA: .amdhsa_kernarg_size 48 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 { %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr @@ -97,9 +101,9 @@ } ; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs_round_up: -; HSA: kernarg_segment_byte_size = 40 ; OS-MESA3D: kernarg_segment_byte_size = 16 -; CO-V2: kernarg_segment_alignment = 4 +; OS-MESA3D: kernarg_segment_alignment = 4 +; HSA: .amdhsa_kernarg_size 40 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 { %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr @@ -124,4 +128,4 @@ attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll @@ -3,8 +3,8 @@ ; FIXME: Error on non-hsa target ; GCN-LABEL: {{^}}test: -; GCN: enable_sgpr_queue_ptr = 1 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 +; GCN: .amdhsa_user_sgpr_queue_ptr 1 define amdgpu_kernel void @test(ptr addrspace(1) %out) { %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 %value = load i32, ptr addrspace(4) %queue_ptr @@ -17,4 +17,4 @@ attributes #0 = { nounwind readnone } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll @@ -1,9 +1,7 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -11,25 +9,25 @@ ; ALL-LABEL: {{^}}test_workgroup_id_x: -; CO-V2: .amd_kernel_code_t -; CO-V2: user_sgpr_count = 6 -; CO-V2: enable_sgpr_workgroup_id_x = 1 -; CO-V2: enable_sgpr_workgroup_id_y = 0 -; CO-V2: enable_sgpr_workgroup_id_z = 0 -; CO-V2: enable_sgpr_workgroup_info = 0 -; CO-V2: enable_vgpr_workitem_id = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 -; CO-V2: .end_amd_kernel_code_t +; MESA3D: .amd_kernel_code_t +; MESA3D: user_sgpr_count = 6 +; MESA3D: enable_sgpr_workgroup_id_x = 1 +; MESA3D: enable_sgpr_workgroup_id_y = 0 +; MESA3D: enable_sgpr_workgroup_id_z = 0 +; MESA3D: enable_sgpr_workgroup_info = 0 +; MESA3D: enable_vgpr_workitem_id = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_x = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_y = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_z = 0 +; MESA3D: .end_amd_kernel_code_t ; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s2{{$}} -; CO-V2: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}} +; MESA3D: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}} ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 -; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 @@ -41,22 +39,22 @@ } ; ALL-LABEL: {{^}}test_workgroup_id_y: -; CO-V2: user_sgpr_count = 6 -; CO-V2: enable_sgpr_workgroup_id_x = 1 -; CO-V2: enable_sgpr_workgroup_id_y = 1 -; CO-V2: enable_sgpr_workgroup_id_z = 0 -; CO-V2: enable_sgpr_workgroup_info = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 +; MESA3D: user_sgpr_count = 6 +; MESA3D: enable_sgpr_workgroup_id_x = 1 +; MESA3D: enable_sgpr_workgroup_id_y = 1 +; MESA3D: enable_sgpr_workgroup_id_z = 0 +; MESA3D: enable_sgpr_workgroup_info = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_x = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_y = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_z = 0 ; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 -; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 @@ -68,30 +66,30 @@ } ; ALL-LABEL: {{^}}test_workgroup_id_z: -; CO-V2: user_sgpr_count = 6 -; CO-V2: enable_sgpr_workgroup_id_x = 1 -; CO-V2: enable_sgpr_workgroup_id_y = 0 -; CO-V2: enable_sgpr_workgroup_id_z = 1 -; CO-V2: enable_sgpr_workgroup_info = 0 -; CO-V2: enable_vgpr_workitem_id = 0 -; CO-V2: enable_sgpr_private_segment_buffer = 1 -; CO-V2: enable_sgpr_dispatch_ptr = 0 -; CO-V2: enable_sgpr_queue_ptr = 0 -; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 -; CO-V2: enable_sgpr_dispatch_id = 0 -; CO-V2: enable_sgpr_flat_scratch_init = 0 -; CO-V2: enable_sgpr_private_segment_size = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 +; MESA3D: user_sgpr_count = 6 +; MESA3D: enable_sgpr_workgroup_id_x = 1 +; MESA3D: enable_sgpr_workgroup_id_y = 0 +; MESA3D: enable_sgpr_workgroup_id_z = 1 +; MESA3D: enable_sgpr_workgroup_info = 0 +; MESA3D: enable_vgpr_workitem_id = 0 +; MESA3D: enable_sgpr_private_segment_buffer = 1 +; MESA3D: enable_sgpr_dispatch_ptr = 0 +; MESA3D: enable_sgpr_queue_ptr = 0 +; MESA3D: enable_sgpr_kernarg_segment_ptr = 1 +; MESA3D: enable_sgpr_dispatch_id = 0 +; MESA3D: enable_sgpr_flat_scratch_init = 0 +; MESA3D: enable_sgpr_private_segment_size = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_x = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_y = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_z = 0 ; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 -; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 @@ -106,4 +104,4 @@ attributes #1 = { nounwind } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -1,11 +1,9 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s +; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 @@ -16,7 +14,7 @@ ; MESA-NEXT: .long 132{{$}} ; ALL-LABEL: {{^}}test_workitem_id_x: -; CO-V2: enable_vgpr_workitem_id = 0 +; MESA3D: enable_vgpr_workitem_id = 0 ; ALL-NOT: v0 ; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0 @@ -33,9 +31,9 @@ ; MESA-NEXT: .long 2180{{$}} ; ALL-LABEL: {{^}}test_workitem_id_y: -; CO-V2: enable_vgpr_workitem_id = 1 -; CO-V2-NOT: v1 -; CO-V2: {{buffer|flat}}_store_dword {{.*}}v1 +; MESA3D: enable_vgpr_workitem_id = 1 +; MESA3D-NOT: v1 +; MESA3D: {{buffer|flat}}_store_dword {{.*}}v1 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]] @@ -51,9 +49,9 @@ ; MESA-NEXT: .long 4228{{$}} ; ALL-LABEL: {{^}}test_workitem_id_z: -; CO-V2: enable_vgpr_workitem_id = 2 -; CO-V2-NOT: v2 -; CO-V2: {{buffer|flat}}_store_dword {{.*}}v2 +; MESA3D: enable_vgpr_workitem_id = 2 +; MESA3D-NOT: v2 +; MESA3D: {{buffer|flat}}_store_dword {{.*}}v2 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]] @@ -129,7 +127,7 @@ ; FIXME: Should be able to avoid enabling in kernel inputs ; FIXME: Packed tid should avoid the and ; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only: -; CO-V2: enable_vgpr_workitem_id = 0 +; MESA3D: enable_vgpr_workitem_id = 0 ; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 @@ -150,7 +148,7 @@ } ; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only: -; CO-V2: enable_vgpr_workitem_id = 1 +; MESA3D: enable_vgpr_workitem_id = 1 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] @@ -172,7 +170,7 @@ } ; ALL-LABEL: {{^}}test_reqd_workgroup_size_z_only: -; CO-V2: enable_vgpr_workitem_id = 2 +; MESA3D: enable_vgpr_workitem_id = 2 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] @@ -200,4 +198,4 @@ !2 = !{i32 1, i32 1, i32 64} !llvm.module.flags = !{!99} -!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} +!99 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -2,10 +2,6 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast: -; HSA: enable_sgpr_private_segment_buffer = 1 -; HSA: enable_sgpr_dispatch_ptr = 0 -; CI: enable_sgpr_queue_ptr = 1 -; GFX9: enable_sgpr_queue_ptr = 0 ; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}} @@ -24,6 +20,11 @@ ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]] +; HSA: .amdhsa_user_sgpr_private_segment_buffer 1 +; HSA: .amdhsa_user_sgpr_dispatch_ptr 0 +; CI: .amdhsa_user_sgpr_queue_ptr 1 +; GFX9: .amdhsa_user_sgpr_queue_ptr 0 + ; At most 2 digits. Make sure src_shared_base is not counted as a high ; number SGPR. @@ -59,10 +60,6 @@ } ; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast: -; HSA: enable_sgpr_private_segment_buffer = 1 -; HSA: enable_sgpr_dispatch_ptr = 0 -; CI: enable_sgpr_queue_ptr = 1 -; GFX9: enable_sgpr_queue_ptr = 0 ; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}} @@ -82,6 +79,11 @@ ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]] +; HSA: .amdhsa_user_sgpr_private_segment_buffer 1 +; HSA: .amdhsa_user_sgpr_dispatch_ptr 0 +; CI: .amdhsa_user_sgpr_queue_ptr 1 +; GFX9: .amdhsa_user_sgpr_queue_ptr 0 + ; HSA: NumSgprs: {{[0-9]+}} define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { %stof = addrspacecast ptr addrspace(5) %ptr to ptr @@ -91,13 +93,14 @@ ; no-op ; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast: -; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]] ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 ; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]] + +; HSA: .amdhsa_user_sgpr_queue_ptr 0 define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 { %stof = addrspacecast ptr addrspace(1) %ptr to ptr store volatile i32 7, ptr %stof @@ -131,9 +134,6 @@ } ; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast: -; HSA: enable_sgpr_private_segment_buffer = 1 -; HSA: enable_sgpr_dispatch_ptr = 0 -; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]] ; CI-DAG: v_cmp_ne_u64_e64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}} @@ -146,6 +146,10 @@ ; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]] ; CI-DAG: ds_write_b32 [[VCASTPTR]], v[[K]] ; GFX9-DAG: ds_write_b32 [[CASTPTR]], v[[K]] + +; HSA: .amdhsa_user_sgpr_private_segment_buffer 1 +; HSA: .amdhsa_user_sgpr_dispatch_ptr 0 +; HSA: .amdhsa_user_sgpr_queue_ptr 0 define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 { %ftos = addrspacecast ptr %ptr to ptr addrspace(3) store volatile i32 0, ptr addrspace(3) %ftos @@ -153,9 +157,6 @@ } ; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast: -; HSA: enable_sgpr_private_segment_buffer = 1 -; HSA: enable_sgpr_dispatch_ptr = 0 -; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]] ; CI-DAG v_cmp_ne_u64_e64 vcc, s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}} @@ -171,6 +172,10 @@ ; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]] ; CI: buffer_store_dword v[[K]], [[VCASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} ; GFX9: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} + +; HSA: .amdhsa_user_sgpr_private_segment_buffer 1 +; HSA: .amdhsa_user_sgpr_dispatch_ptr 0 +; HSA: .amdhsa_user_sgpr_queue_ptr 0 define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 { %ftos = addrspacecast ptr %ptr to ptr addrspace(5) store volatile i32 0, ptr addrspace(5) %ftos @@ -178,7 +183,6 @@ } ; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast: -; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0 ; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] @@ -188,6 +192,8 @@ ; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; GFX9: global_store_dword [[ZERO]], [[ZERO]], s[[[PTRLO]]:[[PTRHI]]{{\]$}} + +; HSA: .amdhsa_user_sgpr_queue_ptr 0 define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 { %ftos = addrspacecast ptr %ptr to ptr addrspace(1) store volatile i32 0, ptr addrspace(1) %ftos @@ -195,10 +201,11 @@ } ; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast: -; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0 ; HSA: s_load_dword s{{[0-9]+}}, s[[[PTRLO]]:[[PTRHI]]], 0x0 + +; HSA: .amdhsa_user_sgpr_queue_ptr 0 define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 { %ftos = addrspacecast ptr %ptr to ptr addrspace(4) load volatile i32, ptr addrspace(4) %ftos @@ -279,13 +286,14 @@ ; HSA-LABEL: {{^}}cast_neg1_private_to_flat_addrspacecast: -; CI: enable_sgpr_queue_ptr = 1 -; GFX9: enable_sgpr_queue_ptr = 0 ; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} ; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]] + +; CI: .amdhsa_user_sgpr_queue_ptr 1 +; GFX9: .amdhsa_user_sgpr_queue_ptr 0 define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 { %cast = addrspacecast ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr store volatile i32 7, ptr %cast @@ -416,4 +424,4 @@ attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -1,16 +1,16 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s +; RUN: llc < %s -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC +; RUN: llc < %s -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s +; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc < %s -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s +; RUN: opt < %s -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s +; RUN: opt < %s -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=r600 -mcpu=cypress -disable-promote-alloca-to-vector | FileCheck %s -check-prefix=R600 -check-prefix=FUNC -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=r600 -mcpu=cypress | FileCheck %s -check-prefix=R600-VECT -check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=cypress -disable-promote-alloca-to-vector | FileCheck %s -check-prefix=R600 -check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck %s -check-prefix=R600-VECT -check-prefix=FUNC ; HSAOPT: @mova_same_clause.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] poison, align 4 ; HSAOPT: @high_alignment.stack = internal unnamed_addr addrspace(3) global [256 x [8 x i32]] poison, align 16 @@ -24,9 +24,7 @@ ; R600: LDS_READ ; R600: LDS_READ -; HSA-PROMOTE: .amd_kernel_code_t -; HSA-PROMOTE: workgroup_group_segment_byte_size = 5120 -; HSA-PROMOTE: .end_amd_kernel_code_t +; HSA-PROMOTE: .amdhsa_group_segment_fixed_size 5120 ; HSA-PROMOTE: s_load_dwordx2 s[{{[0-9:]+}}], s[4:5], 0x1 @@ -35,14 +33,12 @@ ; SI-PROMOTE: ds_read_b32 ; SI-PROMOTE: ds_read_b32 -; HSA-ALLOCA: .amd_kernel_code_t ; FIXME: Creating the emergency stack slots causes us to over-estimate scratch ; by 4 bytes. -; HSA-ALLOCA: workitem_private_segment_byte_size = 24 -; HSA-ALLOCA: .end_amd_kernel_code_t +; HSA-ALLOCA: .amdhsa_private_segment_fixed_size 24 -; HSA-ALLOCA: s_mov_b32 flat_scratch_lo, s7 ; HSA-ALLOCA: s_add_i32 s6, s6, s9 +; HSA-ALLOCA: s_mov_b32 flat_scratch_lo, s7 ; HSA-ALLOCA: s_lshr_b32 flat_scratch_hi, s6, 8 ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen ; encoding: [0x00,0x10,0x70,0xe0 @@ -534,7 +530,7 @@ attributes #1 = { nounwind "amdgpu-flat-work-group-size"="1,256" } !llvm.module.flags = !{!99} -!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} +!99 = !{i32 1, !"amdgpu_code_object_version", i32 400} ; HSAOPT: !1 = !{} ; HSAOPT: !2 = !{i32 0, i32 257} diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll @@ -130,16 +130,14 @@ attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"} !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} -; HSAMD: NT_AMD_HSA_METADATA (AMD HSA Metadata) -; HSAMD: Version: [ 1, 0 ] -; HSAMD: Kernels: -; HSAMD: - Name: min_64_max_64 -; HSAMD: MaxFlatWorkGroupSize: 64 -; HSAMD: - Name: min_64_max_128 -; HSAMD: MaxFlatWorkGroupSize: 128 -; HSAMD: - Name: min_128_max_128 -; HSAMD: MaxFlatWorkGroupSize: 128 -; HSAMD: - Name: min_1024_max_1024 -; HSAMD: MaxFlatWorkGroupSize: 1024 +; HSAMD: amdhsa.kernels +; HSAMD: .max_flat_workgroup_size: 64 +; HSAMD: .name: min_64_max_64 +; HSAMD: .max_flat_workgroup_size: 128 +; HSAMD: .name: min_64_max_128 +; HSAMD: .max_flat_workgroup_size: 128 +; HSAMD: .name: min_128_max_128 +; HSAMD: .max_flat_workgroup_size: 1024 +; HSAMD: .name: min_1024_max_1024 diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -1,7 +1,7 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s ; Make sure to run a GPU with the SGPR allocation bug. @@ -32,7 +32,6 @@ } ; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel: -; GCN: is_dynamic_callstack = 0 ; CI: ; NumSgprs: 38 ; VI-NOBUG: ; NumSgprs: 40 ; VI-BUG: ; NumSgprs: 96 @@ -61,7 +60,6 @@ } ; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel: -; GCN: is_dynamic_callstack = 0 ; CI: ; NumSgprs: 38 ; VI-NOBUG: ; NumSgprs: 40 ; VI-BUG: ; NumSgprs: 96 @@ -87,7 +85,6 @@ } ; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr: -; GCN: is_dynamic_callstack = 0 ; GCN: ; NumVgprs: 41 define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 { call void @indirect_use_10_vgpr() @@ -123,7 +120,6 @@ } ; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr: -; GCN: is_dynamic_callstack = 0 ; CI: ; NumSgprs: 84 ; VI-NOBUG: ; NumSgprs: 86 ; VI-BUG: ; NumSgprs: 96 @@ -159,7 +155,6 @@ } ; GCN-LABEL: {{^}}indirect_2_level_use_stack: -; GCN: is_dynamic_callstack = 0 ; GCN: ScratchSize: 2132 define amdgpu_kernel void @indirect_2_level_use_stack() #0 { call void @indirect_use_stack() @@ -169,7 +164,6 @@ ; Should be maximum of callee usage ; GCN-LABEL: {{^}}multi_call_use_use_stack: -; GCN: is_dynamic_callstack = 0 ; GCN: ScratchSize: 2052 define amdgpu_kernel void @multi_call_use_use_stack() #0 { call void @use_stack0() @@ -181,7 +175,6 @@ declare void @external() #0 ; GCN-LABEL: {{^}}usage_external: -; GCN: is_dynamic_callstack = 1 ; NumSgprs: 48 ; NumVgprs: 24 ; GCN: ScratchSize: 16384 @@ -196,7 +189,6 @@ declare void @external_recurse() #2 ; GCN-LABEL: {{^}}usage_external_recurse: -; GCN: is_dynamic_callstack = 1 ; NumSgprs: 48 ; NumVgprs: 24 ; GCN: ScratchSize: 16384 @@ -229,9 +221,7 @@ } ; GCN-LABEL: {{^}}usage_direct_recursion: -; GCN: is_ptr64 = 1 -; GCN: is_dynamic_callstack = 1 -; GCN: workitem_private_segment_byte_size = 18448{{$}} +; GCN: .amdhsa_private_segment_fixed_size 18448 ; ; GCN-V5-LABEL: {{^}}usage_direct_recursion: ; GCN-V5: .amdhsa_private_segment_fixed_size 2064{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -111,7 +111,6 @@ } ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x: -; GCN: enable_vgpr_workitem_id = 0 ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v31 @@ -120,13 +119,14 @@ ; FIXEDABI-NOT: v31 ; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_vgpr_workitem_id 0 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 { call void @use_workitem_id_x() ret void } ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y: -; GCN: enable_vgpr_workitem_id = 1 ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 @@ -137,13 +137,14 @@ ; FIXEDABI-NOT: v2 ; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_vgpr_workitem_id 1 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 { call void @use_workitem_id_y() ret void } ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z: -; GCN: enable_vgpr_workitem_id = 2 ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 @@ -152,6 +153,8 @@ ; FIXEDABI-NOT: v1 ; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_vgpr_workitem_id 2 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 { call void @use_workitem_id_z() ret void @@ -284,13 +287,14 @@ ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x: -; GCN: enable_vgpr_workitem_id = 0 ; FIXEDABI-NOT: v0 ; FIXEDABI: v_mov_b32_e32 v31, v0 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b ; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_vgpr_workitem_id 0 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 { call void @other_arg_use_workitem_id_x(i32 555) ret void @@ -298,26 +302,28 @@ ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y: -; GCN: enable_vgpr_workitem_id = 1 ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 ; FIXEDABI-NOT: v2 ; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b + +; GCN: .amdhsa_system_vgpr_workitem_id 1 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 { call void @other_arg_use_workitem_id_y(i32 555) ret void } ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z: -; GCN: enable_vgpr_workitem_id = 2 ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 ; FIXEDABI-NOT: v2 ; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b + +; GCN: .amdhsa_system_vgpr_workitem_id 2 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { call void @other_arg_use_workitem_id_z(i32 555) ret void @@ -374,7 +380,6 @@ } ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x: -; GCN: enable_vgpr_workitem_id = 0 ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 @@ -385,6 +390,8 @@ ; FIXEDABI-DAG: v_mov_b32_e32 v31, v0 ; FIXEDABI: s_swappc_b64 + +; GCN: .amdhsa_system_vgpr_workitem_id 0 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 { call void @too_many_args_use_workitem_id_x( i32 10, i32 20, i32 30, i32 40, @@ -639,7 +646,6 @@ } ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz: -; GCN: enable_vgpr_workitem_id = 2 ; GCN-DAG: s_mov_b32 s32, 0 @@ -652,6 +658,8 @@ ; FIXEDABI-DAG: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] ; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_vgpr_workitem_id 2 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 { call void @too_many_args_use_workitem_id_xyz( i32 10, i32 20, i32 30, i32 40, @@ -729,7 +737,6 @@ } ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz: -; GCN: enable_vgpr_workitem_id = 2 ; GCN-NOT: v0 ; GCN-DAG: v_lshlrev_b32_e32 v1, 10, v1 @@ -739,6 +746,8 @@ ; GCN: s_mov_b32 s32, 0 ; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_vgpr_workitem_id 2 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 { call void @too_many_args_use_workitem_id_x_stack_yz( i32 10, i32 20, i32 30, i32 40, @@ -804,4 +813,4 @@ attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -10,8 +10,6 @@ ; GCN-LABEL: {{^}}divergent_if_endif: -; VGPR: workitem_private_segment_byte_size = 16{{$}} - ; GCN: {{^}}; %bb.0: ; GCN: s_mov_b32 m0, -1 @@ -63,6 +61,8 @@ ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]] + +; VGPR: .amdhsa_private_segment_fixed_size 16 define amdgpu_kernel void @divergent_if_endif(ptr addrspace(1) %out) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -82,7 +82,6 @@ } ; GCN-LABEL: {{^}}divergent_loop: -; VGPR: workitem_private_segment_byte_size = 20{{$}} ; GCN: {{^}}; %bb.0: ; GCN-DAG: s_mov_b32 m0, -1 @@ -133,6 +132,8 @@ ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]] + +; VGPR: .amdhsa_private_segment_fixed_size 20 define amdgpu_kernel void @divergent_loop(ptr addrspace(1) %out) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -274,4 +275,4 @@ attributes #1 = { nounwind readnone } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll --- a/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll @@ -1,18 +1,15 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s ; There are no stack objects even though flat is used by default, so ; flat_scratch_init should be disabled. ; ALL-LABEL: {{^}}test: -; HSA: .amd_kernel_code_t -; HSA: enable_sgpr_flat_scratch_init = 0 -; HSA: .end_amd_kernel_code_t ; ALL-NOT: flat_scr @@ -20,6 +17,8 @@ ; HSA-NODEFAULT: buffer_store_dword ; HSA-NOADDR64: flat_store_dword +; HSA: .amdhsa_user_sgpr_flat_scratch_init 0 + ; NOHSA-DEFAULT: buffer_store_dword ; NOHSA-NODEFAULT: flat_store_dword ; NOHSA-NOADDR64: flat_store_dword @@ -53,4 +52,4 @@ } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -1,32 +1,26 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefix=CI -check-prefix=GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefix=CI -check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=CI,HSA-CI-V2,GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK-V2,GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s - -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s ; GCN-LABEL: {{^}}no_vcc_no_flat: -; HSA-CI-V2: is_xnack_enabled = 0 -; HSA-VI-XNACK-V2: is_xnack_enabled = 1 - ; NOT-HSA-CI: .amdhsa_reserve_xnack_mask ; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 ; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 @@ -44,9 +38,6 @@ ; GCN-LABEL: {{^}}vcc_no_flat: -; HSA-CI-V2: is_xnack_enabled = 0 -; HSA-VI-XNACK-V2: is_xnack_enabled = 1 - ; NOT-HSA-CI: .amdhsa_reserve_xnack_mask ; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 ; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 @@ -64,9 +55,6 @@ ; GCN-LABEL: {{^}}no_vcc_flat: -; HSA-CI-V2: is_xnack_enabled = 0 -; HSA-VI-XNACK-V2: is_xnack_enabled = 1 - ; NOT-HSA-CI: .amdhsa_reserve_xnack_mask ; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 ; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 @@ -84,9 +72,6 @@ ; GCN-LABEL: {{^}}vcc_flat: -; HSA-CI-V2: is_xnack_enabled = 0 -; HSA-VI-XNACK-V2: is_xnack_enabled = 1 - ; NOT-HSA-CI: .amdhsa_reserve_xnack_mask ; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 ; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 @@ -107,9 +92,6 @@ ; GCN-LABEL: {{^}}use_flat_scr: -; HSA-CI-V2: is_xnack_enabled = 0 -; HSA-VI-XNACK-V2: is_xnack_enabled = 1 - ; NOT-HSA-CI: .amdhsa_reserve_xnack_mask ; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 ; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 @@ -127,9 +109,6 @@ ; GCN-LABEL: {{^}}use_flat_scr_lo: -; HSA-CI-V2: is_xnack_enabled = 0 -; HSA-VI-XNACK-V2: is_xnack_enabled = 1 - ; NOT-HSA-CI: .amdhsa_reserve_xnack_mask ; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 ; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 @@ -147,9 +126,6 @@ ; GCN-LABEL: {{^}}use_flat_scr_hi: -; HSA-CI-V2: is_xnack_enabled = 0 -; HSA-VI-XNACK-V2: is_xnack_enabled = 1 - ; NOT-HSA-CI: .amdhsa_reserve_xnack_mask ; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0 ; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1 @@ -168,4 +144,4 @@ attributes #0 = { nounwind } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll b/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll --- a/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll @@ -1,11 +1,10 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s -; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" +; CHECK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack-" define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind { store float 0.0, ptr addrspace(1) %out0 ret void } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} - +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll b/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll @@ -3,11 +3,11 @@ ; Make sure that with an HSA triple, we don't default to an ; unsupported device. -; CHECK: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" +; CHECK: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind { store float 0.0, ptr addrspace(1) %out0 ret void } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -1,9 +1,9 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_default_ci: -; GCN: float_mode = 240 -; GCN: enable_dx10_clamp = 1 -; GCN: enable_ieee_mode = 1 +; GCN: .amdhsa_dx10_clamp 1 +; GCN: .amdhsa_ieee_mode 1 +; GCN: FloatMode: 240 define amdgpu_kernel void @test_default_ci(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #0 { store float 0.0, ptr addrspace(1) %out0 store double 0.0, ptr addrspace(1) %out1 @@ -11,9 +11,9 @@ } ; GCN-LABEL: {{^}}test_default_vi: -; GCN: float_mode = 240 -; GCN: enable_dx10_clamp = 1 -; GCN: enable_ieee_mode = 1 +; GCN: .amdhsa_dx10_clamp 1 +; GCN: .amdhsa_ieee_mode 1 +; GCN: FloatMode: 240 define amdgpu_kernel void @test_default_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #1 { store float 0.0, ptr addrspace(1) %out0 store double 0.0, ptr addrspace(1) %out1 @@ -21,9 +21,9 @@ } ; GCN-LABEL: {{^}}test_f64_denormals: -; GCN: float_mode = 192 -; GCN: enable_dx10_clamp = 1 -; GCN: enable_ieee_mode = 1 +; GCN: .amdhsa_dx10_clamp 1 +; GCN: .amdhsa_ieee_mode 1 +; GCN: FloatMode: 192 define amdgpu_kernel void @test_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #2 { store float 0.0, ptr addrspace(1) %out0 store double 0.0, ptr addrspace(1) %out1 @@ -31,9 +31,9 @@ } ; GCN-LABEL: {{^}}test_f32_denormals: -; GCN: float_mode = 48 -; GCN: enable_dx10_clamp = 1 -; GCN: enable_ieee_mode = 1 +; GCN: .amdhsa_dx10_clamp 1 +; GCN: .amdhsa_ieee_mode 1 +; GCN: FloatMode: 48 define amdgpu_kernel void @test_f32_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #3 { store float 0.0, ptr addrspace(1) %out0 store double 0.0, ptr addrspace(1) %out1 @@ -41,9 +41,9 @@ } ; GCN-LABEL: {{^}}test_f32_f64_denormals: -; GCN: float_mode = 240 -; GCN: enable_dx10_clamp = 1 -; GCN: enable_ieee_mode = 1 +; GCN: .amdhsa_dx10_clamp 1 +; GCN: .amdhsa_ieee_mode 1 +; GCN: FloatMode: 240 define amdgpu_kernel void @test_f32_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #4 { store float 0.0, ptr addrspace(1) %out0 store double 0.0, ptr addrspace(1) %out1 @@ -51,9 +51,9 @@ } ; GCN-LABEL: {{^}}test_no_denormals: -; GCN: float_mode = 0 -; GCN: enable_dx10_clamp = 1 -; GCN: enable_ieee_mode = 1 +; GCN: .amdhsa_dx10_clamp 1 +; GCN: .amdhsa_ieee_mode 1 +; GCN: FloatMode: 0 define amdgpu_kernel void @test_no_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #5 { store float 0.0, ptr addrspace(1) %out0 store double 0.0, ptr addrspace(1) %out1 @@ -61,9 +61,9 @@ } ; GCN-LABEL: {{^}}test_no_dx10_clamp_vi: -; GCN: float_mode = 240 -; GCN: enable_dx10_clamp = 0 -; GCN: enable_ieee_mode = 1 +; GCN: .amdhsa_dx10_clamp 0 +; GCN: .amdhsa_ieee_mode 1 +; GCN: FloatMode: 240 define amdgpu_kernel void @test_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #6 { store float 0.0, ptr addrspace(1) %out0 store double 0.0, ptr addrspace(1) %out1 @@ -71,9 +71,9 @@ } ; GCN-LABEL: {{^}}test_no_ieee_mode_vi: -; GCN: float_mode = 240 -; GCN: enable_dx10_clamp = 1 -; GCN: enable_ieee_mode = 0 +; GCN: .amdhsa_dx10_clamp 1 +; GCN: .amdhsa_ieee_mode 0 +; GCN: FloatMode: 240 define amdgpu_kernel void @test_no_ieee_mode_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #7 { store float 0.0, ptr addrspace(1) %out0 store double 0.0, ptr addrspace(1) %out1 @@ -81,9 +81,9 @@ } ; GCN-LABEL: {{^}}test_no_ieee_mode_no_dx10_clamp_vi: -; GCN: float_mode = 240 -; GCN: enable_dx10_clamp = 0 -; GCN: enable_ieee_mode = 0 +; GCN: .amdhsa_dx10_clamp 0 +; GCN: .amdhsa_ieee_mode 0 +; GCN: FloatMode: 240 define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #8 { store float 0.0, ptr addrspace(1) %out0 store double 0.0, ptr addrspace(1) %out1 @@ -101,4 +101,4 @@ attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-func.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-func.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-func.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj --symbols -S --sd - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa --amdhsa-code-object-version=4 -mcpu=kaveri | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF ; The SHT_NOTE section contains the output from the .hsa_code_object_* ; directives. @@ -18,12 +18,13 @@ ; ELF: } ; ELF: SHT_NOTE -; ELF: 0000: 04000000 08000000 01000000 414D4400 -; ELF: 0010: 02000000 01000000 04000000 1B000000 - -; ELF: 0020: 03000000 414D4400 04000700 07000000 -; ELF: 0030: 00000000 00000000 414D4400 414D4447 -; ELF: 0040: 50550000 +; ELF: 0000: 07000000 4F000000 20000000 414D4447 +; ELF: 0010: 50550000 83AE616D 64687361 2E6B6572 +; ELF: 0020: 6E656C73 90AD616D 64687361 2E746172 +; ELF: 0030: 676574BD 616D6467 636E2D75 6E6B6E6F +; ELF: 0040: 776E2D61 6D646873 612D2D67 66783730 +; ELF: 0050: 30AE616D 64687361 2E766572 73696F6E +; ELF: 0060: 92010100 ; ELF: Symbol { ; ELF: Name: simple @@ -32,9 +33,8 @@ ; ELF: } ; HSA: .text -; HSA: .hsa_code_object_version 2,1 -; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" -; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" +; HSA-CI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" +; HSA-VI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx801" ; HSA-NOT: .amdgpu_hsa_kernel simple ; HSA: .globl simple @@ -69,4 +69,4 @@ } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll +++ /dev/null @@ -1,35 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s - -; CHECK: - Name: test_ro_arg -; CHECK-NEXT: SymbolName: 'test_ro_arg@kd' -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: in -; CHECK-NEXT: TypeName: 'float*' -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: ReadOnly -; CHECK-NEXT: IsConst: true -; CHECK-NEXT: IsRestrict: true -; CHECK-NEXT: - Name: out -; CHECK-NEXT: TypeName: 'float*' -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default - -define amdgpu_kernel void @test_ro_arg(ptr addrspace(1) noalias readonly %in, ptr addrspace(1) %out) - !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 - !kernel_arg_base_type !2 !kernel_arg_type_qual !3 { - ret void -} - -!0 = !{i32 1, i32 1} -!1 = !{!"none", !"none"} -!2 = !{!"float*", !"float*"} -!3 = !{!"const restrict", !""} - -!llvm.module.flags = !{!99} -!99 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll +++ /dev/null @@ -1,92 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] -; CHECK-NOT: Printf: -; CHECK: Kernels: - -; CHECK: - Name: test_non_enqueue_kernel_caller -; CHECK-NEXT: SymbolName: 'test_non_enqueue_kernel_caller@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: char -; CHECK-NEXT: Size: 1 -; CHECK-NEXT: Align: 1 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenHostcallBuffer -; CHECK-NOT: ValueKind: HiddenDefaultQueue -; CHECK-NOT: ValueKind: HiddenCompletionAction -define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_enqueue_kernel_caller -; CHECK-NEXT: SymbolName: 'test_enqueue_kernel_caller@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: char -; CHECK-NEXT: Size: 1 -; CHECK-NEXT: Align: 1 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenHostcallBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenDefaultQueue -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenCompletionAction -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { - ret void -} - -attributes #0 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" } -attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} - -!1 = !{i32 0} -!2 = !{!"none"} -!3 = !{!"char"} -!4 = !{!""} - -!opencl.ocl.version = !{!90} -!90 = !{i32 2, i32 0} - -; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll +++ /dev/null @@ -1,1931 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s - -%struct.A = type { i8, float } -%opencl.image1d_t = type opaque -%opencl.image2d_t = type opaque -%opencl.image3d_t = type opaque -%opencl.queue_t = type opaque -%opencl.pipe_t = type opaque -%struct.B = type { ptr addrspace(1)} -%opencl.clk_event_t = type opaque - -@__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant ptr addrspace(1) - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] -; CHECK: Printf: -; CHECK: - '1:1:4:%d\n' -; CHECK: - '2:1:8:%g\n' -; CHECK: Kernels: - -; CHECK: - Name: test_char -; CHECK-NEXT: SymbolName: 'test_char@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: char -; CHECK-NEXT: Size: 1 -; CHECK-NEXT: Align: 1 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NOT: ValueKind: HiddenHostcallBuffer -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NOT: ValueKind: HiddenDefaultQueue -; CHECK-NOT: ValueKind: HiddenCompletionAction -; CHECK: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_char(i8 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 - !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_char_byref_constant -; CHECK-NEXT: SymbolName: 'test_char_byref_constant@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: char -; CHECK-NEXT: Size: 1 -; CHECK-NEXT: Align: 1 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NOT: ValueKind: HiddenHostcallBuffer -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NOT: ValueKind: HiddenDefaultQueue -; CHECK-NOT: ValueKind: HiddenCompletionAction -; CHECK: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_char_byref_constant(ptr addrspace(4) byref(i8) %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 - !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_char_byref_constant_align512 -; CHECK-NEXT: SymbolName: 'test_char_byref_constant_align512@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: TypeName: char -; CHECK-NEXT: Size: 1 -; CHECK-NEXT: Align: 1 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: char -; CHECK-NEXT: Size: 1 -; CHECK-NEXT: Align: 512 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NOT: ValueKind: HiddenHostcallBuffer -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NOT: ValueKind: HiddenDefaultQueue -; CHECK-NOT: ValueKind: HiddenCompletionAction -; CHECK: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_char_byref_constant_align512(i8, ptr addrspace(4) byref(i8) align 512 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !111 - !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_ushort2 -; CHECK-NEXT: SymbolName: 'test_ushort2@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: ushort2 -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_ushort2(<2 x i16> %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 - !kernel_arg_base_type !10 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_int3 -; CHECK-NEXT: SymbolName: 'test_int3@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: int3 -; CHECK-NEXT: Size: 16 -; CHECK-NEXT: Align: 16 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_int3(<3 x i32> %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 - !kernel_arg_base_type !11 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_ulong4 -; CHECK-NEXT: SymbolName: 'test_ulong4@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: ulong4 -; CHECK-NEXT: Size: 32 -; CHECK-NEXT: Align: 32 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_ulong4(<4 x i64> %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 - !kernel_arg_base_type !12 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_half8 -; CHECK-NEXT: SymbolName: 'test_half8@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: half8 -; CHECK-NEXT: Size: 16 -; CHECK-NEXT: Align: 16 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_half8(<8 x half> %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 - !kernel_arg_base_type !13 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_float16 -; CHECK-NEXT: SymbolName: 'test_float16@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: float16 -; CHECK-NEXT: Size: 64 -; CHECK-NEXT: Align: 64 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_float16(<16 x float> %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 - !kernel_arg_base_type !14 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_double16 -; CHECK-NEXT: SymbolName: 'test_double16@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: double16 -; CHECK-NEXT: Size: 128 -; CHECK-NEXT: Align: 128 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_double16(<16 x double> %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 - !kernel_arg_base_type !15 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_pointer -; CHECK-NEXT: SymbolName: 'test_pointer@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: 'int addrspace(5)*' -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_pointer(ptr addrspace(1) %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 - !kernel_arg_base_type !16 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_image -; CHECK-NEXT: SymbolName: 'test_image@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: image2d_t -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: Image -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_image(ptr addrspace(1) %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 - !kernel_arg_base_type !17 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_sampler -; CHECK-NEXT: SymbolName: 'test_sampler@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: sampler_t -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: Sampler -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_sampler(i32 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 - !kernel_arg_base_type !18 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_queue -; CHECK-NEXT: SymbolName: 'test_queue@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: queue_t -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: Queue -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_queue(ptr addrspace(1) %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 - !kernel_arg_base_type !19 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_struct -; CHECK-NEXT: SymbolName: 'test_struct@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: struct A -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_struct(%struct.A %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 - !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_struct_byref_constant -; CHECK-NEXT: SymbolName: 'test_struct_byref_constant@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: struct A -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_struct_byref_constant(ptr addrspace(4) byref(%struct.A) %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 - !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_array -; CHECK-NEXT: SymbolName: 'test_array@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: struct A -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 1 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_array([8 x i8] %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 - !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_array_byref_constant -; CHECK-NEXT: SymbolName: 'test_array_byref_constant@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: struct A -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 1 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_array_byref_constant(ptr addrspace(4) byref([8 x i8]) %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 - !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_i128 -; CHECK-NEXT: SymbolName: 'test_i128@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: i128 -; CHECK-NEXT: Size: 16 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_i128(i128 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 - !kernel_arg_base_type !21 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_multi_arg -; CHECK-NEXT: SymbolName: 'test_multi_arg@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: int -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: b -; CHECK-NEXT: TypeName: short2 -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: c -; CHECK-NEXT: TypeName: char3 -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) #0 - !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 - !kernel_arg_base_type !24 !kernel_arg_type_qual !25 { - ret void -} - -; CHECK: - Name: test_addr_space -; CHECK-NEXT: SymbolName: 'test_addr_space@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: g -; CHECK-NEXT: TypeName: 'int addrspace(5)*' -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: c -; CHECK-NEXT: TypeName: 'int addrspace(5)*' -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Constant -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: l -; CHECK-NEXT: TypeName: 'int addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 4 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_addr_space(ptr addrspace(1) %g, - ptr addrspace(4) %c, - ptr addrspace(3) align 4 %l) #0 - !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 - !kernel_arg_base_type !51 !kernel_arg_type_qual !25 { - ret void -} - -; CHECK: - Name: test_type_qual -; CHECK-NEXT: SymbolName: 'test_type_qual@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: 'int addrspace(5)*' -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: IsVolatile: true -; CHECK-NEXT: - Name: b -; CHECK-NEXT: TypeName: 'int addrspace(5)*' -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: IsConst: true -; CHECK-NEXT: IsRestrict: true -; CHECK-NEXT: - Name: c -; CHECK-NEXT: TypeName: 'int addrspace(5)*' -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: Pipe -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: IsPipe: true -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_type_qual(ptr addrspace(1) %a, - ptr addrspace(1) %b, - ptr addrspace(1) %c) #0 - !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 - !kernel_arg_base_type !51 !kernel_arg_type_qual !70 { - ret void -} - -; CHECK: - Name: test_access_qual -; CHECK-NEXT: SymbolName: 'test_access_qual@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: ro -; CHECK-NEXT: TypeName: image1d_t -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: Image -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: ReadOnly -; CHECK-NEXT: - Name: wo -; CHECK-NEXT: TypeName: image2d_t -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: Image -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: WriteOnly -; CHECK-NEXT: - Name: rw -; CHECK-NEXT: TypeName: image3d_t -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: Image -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: ReadWrite -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_access_qual(ptr addrspace(1) %ro, - ptr addrspace(1) %wo, - ptr addrspace(1) %rw) #0 - !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 - !kernel_arg_base_type !62 !kernel_arg_type_qual !25 { - ret void -} - -; CHECK: - Name: test_vec_type_hint_half -; CHECK-NEXT: SymbolName: 'test_vec_type_hint_half@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Attrs: -; CHECK-NEXT: VecTypeHint: half -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: int -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 { - ret void -} - -; CHECK: - Name: test_vec_type_hint_float -; CHECK-NEXT: SymbolName: 'test_vec_type_hint_float@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Attrs: -; CHECK-NEXT: VecTypeHint: float -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: int -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 { - ret void -} - -; CHECK: - Name: test_vec_type_hint_double -; CHECK-NEXT: SymbolName: 'test_vec_type_hint_double@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Attrs: -; CHECK-NEXT: VecTypeHint: double -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: int -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 { - ret void -} - -; CHECK: - Name: test_vec_type_hint_char -; CHECK-NEXT: SymbolName: 'test_vec_type_hint_char@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Attrs: -; CHECK-NEXT: VecTypeHint: char -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: int -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 { - ret void -} - -; CHECK: - Name: test_vec_type_hint_short -; CHECK-NEXT: SymbolName: 'test_vec_type_hint_short@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Attrs: -; CHECK-NEXT: VecTypeHint: short -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: int -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 { - ret void -} - -; CHECK: - Name: test_vec_type_hint_long -; CHECK-NEXT: SymbolName: 'test_vec_type_hint_long@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Attrs: -; CHECK-NEXT: VecTypeHint: long -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: int -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 { - ret void -} - -; CHECK: - Name: test_vec_type_hint_unknown -; CHECK-NEXT: SymbolName: 'test_vec_type_hint_unknown@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Attrs: -; CHECK-NEXT: VecTypeHint: unknown -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: int -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 { - ret void -} - -; CHECK: - Name: test_reqd_wgs_vec_type_hint -; CHECK-NEXT: SymbolName: 'test_reqd_wgs_vec_type_hint@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Attrs: -; CHECK-NEXT: ReqdWorkGroupSize: [ 1, 2, 4 ] -; CHECK-NEXT: VecTypeHint: int -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: int -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 - !reqd_work_group_size !6 { - ret void -} - -; CHECK: - Name: test_wgs_hint_vec_type_hint -; CHECK-NEXT: SymbolName: 'test_wgs_hint_vec_type_hint@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Attrs: -; CHECK-NEXT: WorkGroupSizeHint: [ 8, 16, 32 ] -; CHECK-NEXT: VecTypeHint: uint4 -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: int -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 - !work_group_size_hint !8 { - ret void -} - -; CHECK: - Name: test_arg_ptr_to_ptr -; CHECK-NEXT: SymbolName: 'test_arg_ptr_to_ptr@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: 'int addrspace(5)* addrspace(5)*' -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_arg_ptr_to_ptr(ptr addrspace(1) %a) #0 - !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 - !kernel_arg_base_type !80 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_arg_struct_contains_ptr -; CHECK-NEXT: SymbolName: 'test_arg_struct_contains_ptr@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: struct B -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 - !kernel_arg_base_type !82 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_arg_vector_of_ptr -; CHECK-NEXT: SymbolName: 'test_arg_vector_of_ptr@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: 'global int addrspace(5)* __attribute__((ext_vector_type(2)))' -; CHECK-NEXT: Size: 16 -; CHECK-NEXT: Align: 16 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x ptr addrspace(1)> %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 - !kernel_arg_base_type !83 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_arg_unknown_builtin_type -; CHECK-NEXT: SymbolName: 'test_arg_unknown_builtin_type@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: clk_event_t -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_arg_unknown_builtin_type( - ptr addrspace(1) %a) #0 - !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 - !kernel_arg_base_type !84 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_pointee_align -; CHECK-NEXT: SymbolName: 'test_pointee_align@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: 'long addrspace(5)*' -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: b -; CHECK-NEXT: TypeName: 'char addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 1 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: c -; CHECK-NEXT: TypeName: 'char2 addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 2 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: d -; CHECK-NEXT: TypeName: 'char3 addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 4 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: e -; CHECK-NEXT: TypeName: 'char4 addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 4 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: f -; CHECK-NEXT: TypeName: 'char8 addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 8 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: g -; CHECK-NEXT: TypeName: 'char16 addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 16 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: h -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 1 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_pointee_align(ptr addrspace(1) %a, - ptr addrspace(3) %b, - ptr addrspace(3) align 2 %c, - ptr addrspace(3) align 4 %d, - ptr addrspace(3) align 4 %e, - ptr addrspace(3) align 8 %f, - ptr addrspace(3) align 16 %g, - ptr addrspace(3) %h) #0 - !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 - !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { - ret void -} - -; CHECK: - Name: test_pointee_align_attribute -; CHECK-NEXT: SymbolName: 'test_pointee_align_attribute@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: 'long addrspace(5)*' -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: b -; CHECK-NEXT: TypeName: 'char addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 8 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: c -; CHECK-NEXT: TypeName: 'char2 addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 32 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: d -; CHECK-NEXT: TypeName: 'char3 addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 64 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: e -; CHECK-NEXT: TypeName: 'char4 addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 256 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: f -; CHECK-NEXT: TypeName: 'char8 addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 128 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: g -; CHECK-NEXT: TypeName: 'char16 addrspace(5)*' -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 1024 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: h -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: PointeeAlign: 16 -; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_pointee_align_attribute(ptr addrspace(1) align 16 %a, - ptr addrspace(3) align 8 %b, - ptr addrspace(3) align 32 %c, - ptr addrspace(3) align 64 %d, - ptr addrspace(3) align 256 %e, - ptr addrspace(3) align 128 %f, - ptr addrspace(3) align 1024 %g, - ptr addrspace(3) align 16 %h) #0 - !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 - !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { - ret void -} - - -; CHECK: - Name: __test_block_invoke_kernel -; CHECK-NEXT: SymbolName: '__test_block_invoke_kernel@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Attrs: -; CHECK-NEXT: RuntimeHandle: __test_block_invoke_kernel_runtime_handle -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: arg -; CHECK-NEXT: TypeName: __block_literal -; CHECK-NEXT: Size: 25 -; CHECK-NEXT: Align: 1 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @__test_block_invoke_kernel( - <{ i32, i32, ptr, ptr addrspace(1), i8 }> %arg) #1 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110 - !kernel_arg_base_type !110 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: test_enqueue_kernel_caller -; CHECK-NEXT: SymbolName: 'test_enqueue_kernel_caller@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: char -; CHECK-NEXT: Size: 1 -; CHECK-NEXT: Align: 1 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenPrintfBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenDefaultQueue -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenCompletionAction -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #2 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 - !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: - Name: unknown_addrspace_kernarg -; CHECK: Args: -; CHECK-NEXT: - Name: ptr -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -define amdgpu_kernel void @unknown_addrspace_kernarg(ptr addrspace(12345) %ptr) #0 { - ret void -} - -attributes #0 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" } -attributes #1 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } -attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} - -!llvm.printf.fmts = !{!100, !101} - -!1 = !{i32 0} -!2 = !{!"none"} -!3 = !{!"int"} -!4 = !{!""} -!5 = !{i32 undef, i32 1} -!6 = !{i32 1, i32 2, i32 4} -!7 = !{<4 x i32> undef, i32 0} -!8 = !{i32 8, i32 16, i32 32} -!9 = !{!"char"} -!10 = !{!"ushort2"} -!11 = !{!"int3"} -!12 = !{!"ulong4"} -!13 = !{!"half8"} -!14 = !{!"float16"} -!15 = !{!"double16"} -!16 = !{!"int addrspace(5)*"} -!17 = !{!"image2d_t"} -!18 = !{!"sampler_t"} -!19 = !{!"queue_t"} -!20 = !{!"struct A"} -!21 = !{!"i128"} -!22 = !{i32 0, i32 0, i32 0} -!23 = !{!"none", !"none", !"none"} -!24 = !{!"int", !"short2", !"char3"} -!25 = !{!"", !"", !""} -!26 = !{half undef, i32 1} -!27 = !{float undef, i32 1} -!28 = !{double undef, i32 1} -!29 = !{i8 undef, i32 1} -!30 = !{i16 undef, i32 1} -!31 = !{i64 undef, i32 1} -!32 = !{ptr addrspace(5) undef, i32 1} -!50 = !{i32 1, i32 2, i32 3} -!51 = !{!"int addrspace(5)*", !"int addrspace(5)*", !"int addrspace(5)*"} -!60 = !{i32 1, i32 1, i32 1} -!61 = !{!"read_only", !"write_only", !"read_write"} -!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"} -!70 = !{!"volatile", !"const restrict", !"pipe"} -!80 = !{!"int addrspace(5)* addrspace(5)*"} -!81 = !{i32 1} -!82 = !{!"struct B"} -!83 = !{!"global int addrspace(5)* __attribute__((ext_vector_type(2)))"} -!84 = !{!"clk_event_t"} -!opencl.ocl.version = !{!90} -!90 = !{i32 2, i32 0} -!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3} -!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!93 = !{!"long addrspace(5)*", !"char addrspace(5)*", !"char2 addrspace(5)*", !"char3 addrspace(5)*", !"char4 addrspace(5)*", !"char8 addrspace(5)*", !"char16 addrspace(5)*"} -!94 = !{!"", !"", !"", !"", !"", !"", !""} -!100 = !{!"1:1:4:%d\5Cn"} -!101 = !{!"2:1:8:%g\5Cn"} -!110 = !{!"__block_literal"} -!111 = !{!"char", !"char"} -; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll +++ /dev/null @@ -1,313 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] -; CHECK: Kernels: - -; CHECK: - Name: test0 -; CHECK: SymbolName: 'test0@kd' -; CHECK: Args: -; CHECK-NEXT: - Name: r -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: a -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: b -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: CodeProps: -define amdgpu_kernel void @test0( - ptr addrspace(1) %r, - ptr addrspace(1) %a, - ptr addrspace(1) %b) { -entry: - %a.val = load half, ptr addrspace(1) %a - %b.val = load half, ptr addrspace(1) %b - %r.val = fadd half %a.val, %b.val - store half %r.val, ptr addrspace(1) %r - ret void -} - -; CHECK: - Name: test8 -; CHECK: SymbolName: 'test8@kd' -; CHECK: Args: -; CHECK-NEXT: - Name: r -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: a -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: b -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: CodeProps: -define amdgpu_kernel void @test8( - ptr addrspace(1) %r, - ptr addrspace(1) %a, - ptr addrspace(1) %b) #0 { -entry: - %a.val = load half, ptr addrspace(1) %a - %b.val = load half, ptr addrspace(1) %b - %r.val = fadd half %a.val, %b.val - store half %r.val, ptr addrspace(1) %r - ret void -} - -; CHECK: - Name: test16 -; CHECK: SymbolName: 'test16@kd' -; CHECK: Args: -; CHECK-NEXT: - Name: r -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: a -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: b -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: CodeProps: -define amdgpu_kernel void @test16( - ptr addrspace(1) %r, - ptr addrspace(1) %a, - ptr addrspace(1) %b) #1 { -entry: - %a.val = load half, ptr addrspace(1) %a - %b.val = load half, ptr addrspace(1) %b - %r.val = fadd half %a.val, %b.val - store half %r.val, ptr addrspace(1) %r - ret void -} - -; CHECK: - Name: test24 -; CHECK: SymbolName: 'test24@kd' -; CHECK: Args: -; CHECK-NEXT: - Name: r -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: a -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: b -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: CodeProps: -define amdgpu_kernel void @test24( - ptr addrspace(1) %r, - ptr addrspace(1) %a, - ptr addrspace(1) %b) #2 { -entry: - %a.val = load half, ptr addrspace(1) %a - %b.val = load half, ptr addrspace(1) %b - %r.val = fadd half %a.val, %b.val - store half %r.val, ptr addrspace(1) %r - ret void -} - -; CHECK: - Name: test32 -; CHECK: SymbolName: 'test32@kd' -; CHECK: Args: -; CHECK-NEXT: - Name: r -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: a -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: b -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenHostcallBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: CodeProps: -define amdgpu_kernel void @test32( - ptr addrspace(1) %r, - ptr addrspace(1) %a, - ptr addrspace(1) %b) #3 { -entry: - %a.val = load half, ptr addrspace(1) %a - %b.val = load half, ptr addrspace(1) %b - %r.val = fadd half %a.val, %b.val - store half %r.val, ptr addrspace(1) %r - ret void -} - -; CHECK: - Name: test48 -; CHECK: SymbolName: 'test48@kd' -; CHECK: Args: -; CHECK-NEXT: - Name: r -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: a -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: b -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenHostcallBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenDefaultQueue -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenCompletionAction -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: CodeProps: -define amdgpu_kernel void @test48( - ptr addrspace(1) %r, - ptr addrspace(1) %a, - ptr addrspace(1) %b) #4 { -entry: - %a.val = load half, ptr addrspace(1) %a - %b.val = load half, ptr addrspace(1) %b - %r.val = fadd half %a.val, %b.val - store half %r.val, ptr addrspace(1) %r - ret void -} - -; CHECK: - Name: test56 -; CHECK: SymbolName: 'test56@kd' -; CHECK: Args: -; CHECK-NEXT: - Name: r -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: a -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Name: b -; CHECK-NEXT: Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: GlobalBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenHostcallBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenDefaultQueue -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenCompletionAction -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: CodeProps: -define amdgpu_kernel void @test56( - ptr addrspace(1) %r, - ptr addrspace(1) %a, - ptr addrspace(1) %b) #5 { -entry: - %a.val = load half, ptr addrspace(1) %a - %b.val = load half, ptr addrspace(1) %b - %r.val = fadd half %a.val, %b.val - store half %r.val, ptr addrspace(1) %r - ret void -} - -; We don't have a use of llvm.amdgcn.implicitarg.ptr, so optnone to -; avoid optimizing out the implicit argument allocation. -attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="8" } -attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="16" } -attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="24" } -attributes #3 = { optnone noinline "amdgpu-implicitarg-num-bytes"="32" } -attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } -attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll +++ /dev/null @@ -1,95 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s - -%opencl.image1d_t = type opaque -%opencl.image1d_array_t = type opaque -%opencl.image1d_buffer_t = type opaque -%opencl.image2d_t = type opaque -%opencl.image2d_array_t = type opaque -%opencl.image2d_array_depth_t = type opaque -%opencl.image2d_array_msaa_t = type opaque -%opencl.image2d_array_msaa_depth_t = type opaque -%opencl.image2d_depth_t = type opaque -%opencl.image2d_msaa_t = type opaque -%opencl.image2d_msaa_depth_t = type opaque -%opencl.image3d_t = type opaque - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] - -; CHECK: Kernels: -; CHECK: - Name: test -; CHECK: SymbolName: 'test@kd' -; CHECK: Args: -; CHECK: - Name: a -; CHECK: TypeName: image1d_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -; CHECK: - Name: b -; CHECK: TypeName: image1d_array_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -; CHECK: - Name: c -; CHECK: TypeName: image1d_buffer_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -; CHECK: - Name: d -; CHECK: TypeName: image2d_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -; CHECK: - Name: e -; CHECK: TypeName: image2d_array_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -; CHECK: - Name: f -; CHECK: TypeName: image2d_array_depth_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -; CHECK: - Name: g -; CHECK: TypeName: image2d_array_msaa_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -; CHECK: - Name: h -; CHECK: TypeName: image2d_array_msaa_depth_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -; CHECK: - Name: i -; CHECK: TypeName: image2d_depth_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -; CHECK: - Name: j -; CHECK: TypeName: image2d_msaa_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -; CHECK: - Name: k -; CHECK: TypeName: image2d_msaa_depth_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -; CHECK: - Name: l -; CHECK: TypeName: image3d_t -; CHECK: Size: 8 -; CHECK: ValueKind: Image -define amdgpu_kernel void @test(ptr addrspace(1) %a, - ptr addrspace(1) %b, - ptr addrspace(1) %c, - ptr addrspace(1) %d, - ptr addrspace(1) %e, - ptr addrspace(1) %f, - ptr addrspace(1) %g, - ptr addrspace(1) %h, - ptr addrspace(1) %i, - ptr addrspace(1) %j, - ptr addrspace(1) %k, - ptr addrspace(1) %l) - !kernel_arg_type !1 !kernel_arg_base_type !1 { - ret void -} - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} -!1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t", - !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t", - !"image2d_array_msaa_t", !"image2d_array_msaa_depth_t", - !"image2d_depth_t", !"image2d_msaa_t", !"image2d_msaa_depth_t", - !"image3d_t"} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s - -; Make sure llc does not crash for invalid opencl version metadata. - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] -; CHECK: ... - -!opencl.ocl.version = !{} -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s - -; Make sure llc does not crash for invalid opencl version metadata. - -; CHECK: --- -; CHECK: amdhsa.version: -; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 -; CHECK: ... - -!opencl.ocl.version = !{!0} -!llvm.module.flags = !{!1} -!0 = !{} -!1 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s - -; Make sure llc does not crash for invalid opencl version metadata. - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] -; CHECK: ... - -!opencl.ocl.version = !{!0} -!llvm.module.flags = !{!1} -!0 = !{} -!1 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s - -; Make sure llc does not crash for invalid opencl version metadata. - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] -; CHECK: ... - -!opencl.ocl.version = !{!0} -!llvm.module.flags = !{!1} -!0 = !{i32 1} -!1 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll +++ /dev/null @@ -1,173 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX700 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX900 %s - -@var = addrspace(1) global float 0.0 - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] -; CHECK: Kernels: - -; CHECK-LABEL: - Name: test -; CHECK: SymbolName: 'test@kd' -; CHECK: CodeProps: -; CHECK: KernargSegmentSize: 24 -; CHECK: GroupSegmentFixedSize: 0 -; CHECK: PrivateSegmentFixedSize: 0 -; CHECK: KernargSegmentAlign: 8 -; CHECK: WavefrontSize: 64 -; CHECK: NumSGPRs: 6 -; CHECK: NumVGPRs: {{3|6}} -; CHECK: MaxFlatWorkGroupSize: 1024 -define amdgpu_kernel void @test( - ptr addrspace(1) %r, - ptr addrspace(1) %a, - ptr addrspace(1) %b) { -entry: - %a.val = load half, ptr addrspace(1) %a - %b.val = load half, ptr addrspace(1) %b - %r.val = fadd half %a.val, %b.val - store half %r.val, ptr addrspace(1) %r - ret void -} - -; CHECK-LABEL: - Name: test_max_flat_workgroup_size -; CHECK: SymbolName: 'test_max_flat_workgroup_size@kd' -; CHECK: CodeProps: -; CHECK: KernargSegmentSize: 24 -; CHECK: GroupSegmentFixedSize: 0 -; CHECK: PrivateSegmentFixedSize: 0 -; CHECK: KernargSegmentAlign: 8 -; CHECK: WavefrontSize: 64 -; CHECK: NumSGPRs: 6 -; CHECK: NumVGPRs: {{3|6}} -; CHECK: MaxFlatWorkGroupSize: 256 -define amdgpu_kernel void @test_max_flat_workgroup_size( - ptr addrspace(1) %r, - ptr addrspace(1) %a, - ptr addrspace(1) %b) #2 { -entry: - %a.val = load half, ptr addrspace(1) %a - %b.val = load half, ptr addrspace(1) %b - %r.val = fadd half %a.val, %b.val - store half %r.val, ptr addrspace(1) %r - ret void -} - -; CHECK-LABEL: - Name: num_spilled_sgprs -; CHECK: SymbolName: 'num_spilled_sgprs@kd' -; CHECK: CodeProps: -; GFX700: NumSpilledSGPRs: 38 -; GFX803: NumSpilledSGPRs: 22 -; GFX900: NumSpilledSGPRs: {{22|48}} -define amdgpu_kernel void @num_spilled_sgprs( - ptr addrspace(1) %out0, ptr addrspace(1) %out1, [8 x i32], - ptr addrspace(1) %out2, ptr addrspace(1) %out3, [8 x i32], - ptr addrspace(1) %out4, ptr addrspace(1) %out5, [8 x i32], - ptr addrspace(1) %out6, ptr addrspace(1) %out7, [8 x i32], - ptr addrspace(1) %out8, ptr addrspace(1) %out9, [8 x i32], - ptr addrspace(1) %outa, ptr addrspace(1) %outb, [8 x i32], - ptr addrspace(1) %outc, ptr addrspace(1) %outd, [8 x i32], - ptr addrspace(1) %oute, ptr addrspace(1) %outf, [8 x i32], - i32 %in0, i32 %in1, i32 %in2, i32 %in3, [8 x i32], - i32 %in4, i32 %in5, i32 %in6, i32 %in7, [8 x i32], - i32 %in8, i32 %in9, i32 %ina, i32 %inb, [8 x i32], - i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 { -entry: - store i32 %in0, ptr addrspace(1) %out0 - store i32 %in1, ptr addrspace(1) %out1 - store i32 %in2, ptr addrspace(1) %out2 - store i32 %in3, ptr addrspace(1) %out3 - store i32 %in4, ptr addrspace(1) %out4 - store i32 %in5, ptr addrspace(1) %out5 - store i32 %in6, ptr addrspace(1) %out6 - store i32 %in7, ptr addrspace(1) %out7 - store i32 %in8, ptr addrspace(1) %out8 - store i32 %in9, ptr addrspace(1) %out9 - store i32 %ina, ptr addrspace(1) %outa - store i32 %inb, ptr addrspace(1) %outb - store i32 %inc, ptr addrspace(1) %outc - store i32 %ind, ptr addrspace(1) %outd - store i32 %ine, ptr addrspace(1) %oute - store i32 %inf, ptr addrspace(1) %outf - ret void -} - -; CHECK-LABEL: - Name: num_spilled_vgprs -; CHECK: SymbolName: 'num_spilled_vgprs@kd' -; CHECK: CodeProps: -; CHECK: NumSpilledVGPRs: {{13|14}} -define amdgpu_kernel void @num_spilled_vgprs() #1 { - %val0 = load volatile float, ptr addrspace(1) @var - %val1 = load volatile float, ptr addrspace(1) @var - %val2 = load volatile float, ptr addrspace(1) @var - %val3 = load volatile float, ptr addrspace(1) @var - %val4 = load volatile float, ptr addrspace(1) @var - %val5 = load volatile float, ptr addrspace(1) @var - %val6 = load volatile float, ptr addrspace(1) @var - %val7 = load volatile float, ptr addrspace(1) @var - %val8 = load volatile float, ptr addrspace(1) @var - %val9 = load volatile float, ptr addrspace(1) @var - %val10 = load volatile float, ptr addrspace(1) @var - %val11 = load volatile float, ptr addrspace(1) @var - %val12 = load volatile float, ptr addrspace(1) @var - %val13 = load volatile float, ptr addrspace(1) @var - %val14 = load volatile float, ptr addrspace(1) @var - %val15 = load volatile float, ptr addrspace(1) @var - %val16 = load volatile float, ptr addrspace(1) @var - %val17 = load volatile float, ptr addrspace(1) @var - %val18 = load volatile float, ptr addrspace(1) @var - %val19 = load volatile float, ptr addrspace(1) @var - %val20 = load volatile float, ptr addrspace(1) @var - %val21 = load volatile float, ptr addrspace(1) @var - %val22 = load volatile float, ptr addrspace(1) @var - %val23 = load volatile float, ptr addrspace(1) @var - %val24 = load volatile float, ptr addrspace(1) @var - %val25 = load volatile float, ptr addrspace(1) @var - %val26 = load volatile float, ptr addrspace(1) @var - %val27 = load volatile float, ptr addrspace(1) @var - %val28 = load volatile float, ptr addrspace(1) @var - %val29 = load volatile float, ptr addrspace(1) @var - %val30 = load volatile float, ptr addrspace(1) @var - - store volatile float %val0, ptr addrspace(1) @var - store volatile float %val1, ptr addrspace(1) @var - store volatile float %val2, ptr addrspace(1) @var - store volatile float %val3, ptr addrspace(1) @var - store volatile float %val4, ptr addrspace(1) @var - store volatile float %val5, ptr addrspace(1) @var - store volatile float %val6, ptr addrspace(1) @var - store volatile float %val7, ptr addrspace(1) @var - store volatile float %val8, ptr addrspace(1) @var - store volatile float %val9, ptr addrspace(1) @var - store volatile float %val10, ptr addrspace(1) @var - store volatile float %val11, ptr addrspace(1) @var - store volatile float %val12, ptr addrspace(1) @var - store volatile float %val13, ptr addrspace(1) @var - store volatile float %val14, ptr addrspace(1) @var - store volatile float %val15, ptr addrspace(1) @var - store volatile float %val16, ptr addrspace(1) @var - store volatile float %val17, ptr addrspace(1) @var - store volatile float %val18, ptr addrspace(1) @var - store volatile float %val19, ptr addrspace(1) @var - store volatile float %val20, ptr addrspace(1) @var - store volatile float %val21, ptr addrspace(1) @var - store volatile float %val22, ptr addrspace(1) @var - store volatile float %val23, ptr addrspace(1) @var - store volatile float %val24, ptr addrspace(1) @var - store volatile float %val25, ptr addrspace(1) @var - store volatile float %val26, ptr addrspace(1) @var - store volatile float %val27, ptr addrspace(1) @var - store volatile float %val28, ptr addrspace(1) @var - store volatile float %val29, ptr addrspace(1) @var - store volatile float %val30, ptr addrspace(1) @var - - ret void -} - -attributes #0 = { "amdgpu-num-sgpr"="14" } -attributes #1 = { "amdgpu-num-vgpr"="20" } -attributes #2 = { "amdgpu-flat-work-group-size"="1,256" } - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -1,62 +1,61 @@ ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx600 | FileCheck --check-prefixes=NONHSA-SI600 %s ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx601 | FileCheck --check-prefixes=NONHSA-SI601 %s ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx602 | FileCheck --check-prefixes=NONHSA-SI602 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=HSA,HSA-CI700 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefixes=HSA,HSA-CI700 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefixes=HSA,HSA-CI701 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefixes=HSA,HSA-CI701 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefixes=HSA,HSA-CI702 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefixes=HSA,HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefixes=HSA,HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefixes=HSA,HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 | FileCheck --check-prefixes=HSA,HSA-CI704 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire | FileCheck --check-prefixes=HSA,HSA-CI704 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 | FileCheck --check-prefixes=HSA,HSA-CI705 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 | FileCheck --check-prefixes=HSA,HSA-VI801 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI801 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 | FileCheck --check-prefixes=HSA,HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 | FileCheck --check-prefixes=HSA,HSA-VI805 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro | FileCheck --check-prefixes=HSA,HSA-VI805 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefixes=HSA,HSA-VI810 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney | FileCheck --check-prefixes=HSA,HSA-VI810 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX900 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=HSA,HSA-GFX901 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX902 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefixes=HSA,HSA-GFX903 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX904 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 | FileCheck --check-prefixes=HSA,HSA-GFX905 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX906 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=HSA,HSA-GFX907 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefix=HSA-CI700 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA-CI700 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefix=HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefix=HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefix=HSA-CI702 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 | FileCheck --check-prefix=HSA-CI704 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire | FileCheck --check-prefix=HSA-CI704 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 | FileCheck --check-prefix=HSA-CI705 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 | FileCheck --check-prefix=HSA-VI801 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI801 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 | FileCheck --check-prefix=HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefix=HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 | FileCheck --check-prefix=HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 | FileCheck --check-prefix=HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 | FileCheck --check-prefix=HSA-VI805 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro | FileCheck --check-prefix=HSA-VI805 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA-VI810 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney | FileCheck --check-prefix=HSA-VI810 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-xnack | FileCheck --check-prefix=HSA-GFX900 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA-GFX901 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 -mattr=-xnack | FileCheck --check-prefix=HSA-GFX902 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefix=HSA-GFX903 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 -mattr=-xnack | FileCheck --check-prefix=HSA-GFX904 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 | FileCheck --check-prefix=HSA-GFX905 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-xnack | FileCheck --check-prefix=HSA-GFX906 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 | FileCheck --check-prefix=HSA-GFX907 %s -; HSA: .hsa_code_object_version 2,1 ; NONHSA-SI600: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx600" ; NONHSA-SI601: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx601" ; NONHSA-SI602: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx602" -; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" -; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU" -; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU" -; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU" -; HSA-CI704: .hsa_code_object_isa 7,0,4,"AMD","AMDGPU" -; HSA-CI705: .hsa_code_object_isa 7,0,5,"AMD","AMDGPU" -; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" -; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU" -; HSA-VI803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU" -; HSA-VI805: .hsa_code_object_isa 8,0,5,"AMD","AMDGPU" -; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU" -; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU" -; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU" -; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" -; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU" -; HSA-GFX904: .hsa_code_object_isa 9,0,4,"AMD","AMDGPU" -; HSA-GFX905: .hsa_code_object_isa 9,0,5,"AMD","AMDGPU" -; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU" -; HSA-GFX907: .hsa_code_object_isa 9,0,7,"AMD","AMDGPU" +; HSA-CI700: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" +; HSA-CI701: .amdgcn_target "amdgcn-unknown-amdhsa--gfx701" +; HSA-CI702: .amdgcn_target "amdgcn-unknown-amdhsa--gfx702" +; HSA-CI703: .amdgcn_target "amdgcn-unknown-amdhsa--gfx703" +; HSA-CI704: .amdgcn_target "amdgcn-unknown-amdhsa--gfx704" +; HSA-CI705: .amdgcn_target "amdgcn-unknown-amdhsa--gfx705" +; HSA-VI801: .amdgcn_target "amdgcn-unknown-amdhsa--gfx801" +; HSA-VI802: .amdgcn_target "amdgcn-unknown-amdhsa--gfx802" +; HSA-VI803: .amdgcn_target "amdgcn-unknown-amdhsa--gfx803" +; HSA-VI805: .amdgcn_target "amdgcn-unknown-amdhsa--gfx805" +; HSA-VI810: .amdgcn_target "amdgcn-unknown-amdhsa--gfx810" +; HSA-GFX900: .amdgcn_target "amdgcn-unknown-amdhsa--gfx900:xnack-" +; HSA-GFX901: .amdgcn_target "amdgcn-unknown-amdhsa--gfx900" +; HSA-GFX902: .amdgcn_target "amdgcn-unknown-amdhsa--gfx902:xnack-" +; HSA-GFX903: .amdgcn_target "amdgcn-unknown-amdhsa--gfx902" +; HSA-GFX904: .amdgcn_target "amdgcn-unknown-amdhsa--gfx904:xnack-" +; HSA-GFX905: .amdgcn_target "amdgcn-unknown-amdhsa--gfx904" +; HSA-GFX906: .amdgcn_target "amdgcn-unknown-amdhsa--gfx906:xnack-" +; HSA-GFX907: .amdgcn_target "amdgcn-unknown-amdhsa--gfx906" !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll --- a/llvm/test/CodeGen/AMDGPU/hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa.ll @@ -1,14 +1,14 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -S --sd --syms - | FileCheck --check-prefix=ELF %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -S --sd --syms - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=4 | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s ; The SHT_NOTE section contains the output from the .hsa_code_object_* ; directives. @@ -26,39 +26,66 @@ ; ELF: SHF_ALLOC (0x2) ; ELF: ] ; ELF: SectionData ( -; ELF: 0000: 04000000 08000000 01000000 414D4400 -; ELF: 0010: 02000000 01000000 04000000 1B000000 -; ELF: 0020: 03000000 414D4400 04000700 07000000 -; ELF: 0030: 00000000 00000000 414D4400 414D4447 -; ELF: 0040: 50550000 +; ELF: 0000: 07000000 A8020000 20000000 414D4447 +; ELF: 0010: 50550000 83AE616D 64687361 2E6B6572 +; ELF: 0020: 6E656C73 928DA52E 61726773 9185AE2E +; ELF: 0030: 61646472 6573735F 73706163 65A6676C +; ELF: 0040: 6F62616C A52E6E61 6D65A36F 7574A72E +; ELF: 0050: 6F666673 657400A5 2E73697A 6508AB2E +; ELF: 0060: 76616C75 655F6B69 6E64AD67 6C6F6261 +; ELF: 0070: 6C5F6275 66666572 B92E6772 6F75705F +; ELF: 0080: 7365676D 656E745F 66697865 645F7369 +; ELF: 0090: 7A6500B6 2E6B6572 6E617267 5F736567 +; ELF: 00A0: 6D656E74 5F616C69 676E08B5 2E6B6572 +; ELF: 00B0: 6E617267 5F736567 6D656E74 5F73697A +; ELF: 00C0: 6508B82E 6D61785F 666C6174 5F776F72 +; ELF: 00D0: 6B67726F 75705F73 697A65CD 0400A52E +; ELF: 00E0: 6E616D65 A673696D 706C65BB 2E707269 +; ELF: 00F0: 76617465 5F736567 6D656E74 5F666978 +; ELF: 0100: 65645F73 697A6500 AB2E7367 70725F63 +; ELF: 0110: 6F756E74 06B12E73 6770725F 7370696C +; ELF: 0120: 6C5F636F 756E7400 A72E7379 6D626F6C +; ELF: 0130: A973696D 706C652E 6B64AB2E 76677072 +; ELF: 0140: 5F636F75 6E7403B1 2E766770 725F7370 +; ELF: 0150: 696C6C5F 636F756E 7400AF2E 77617665 +; ELF: 0160: 66726F6E 745F7369 7A65408D A52E6172 +; ELF: 0170: 677390B9 2E67726F 75705F73 65676D65 +; ELF: 0180: 6E745F66 69786564 5F73697A 6500B62E +; ELF: 0190: 6B65726E 6172675F 7365676D 656E745F +; ELF: 01A0: 616C6967 6E04B52E 6B65726E 6172675F +; ELF: 01B0: 7365676D 656E745F 73697A65 00B82E6D +; ELF: 01C0: 61785F66 6C61745F 776F726B 67726F75 +; ELF: 01D0: 705F7369 7A65CD04 00A52E6E 616D65B2 +; ELF: 01E0: 73696D70 6C655F6E 6F5F6B65 726E6172 +; ELF: 01F0: 6773BB2E 70726976 6174655F 7365676D +; ELF: 0200: 656E745F 66697865 645F7369 7A6500AB +; ELF: 0210: 2E736770 725F636F 756E7400 B12E7367 +; ELF: 0220: 70725F73 70696C6C 5F636F75 6E7400A7 +; ELF: 0230: 2E73796D 626F6CB5 73696D70 6C655F6E +; ELF: 0240: 6F5F6B65 726E6172 67732E6B 64AB2E76 +; ELF: 0250: 6770725F 636F756E 7402B12E 76677072 +; ELF: 0260: 5F737069 6C6C5F63 6F756E74 00AF2E77 +; ELF: 0270: 61766566 726F6E74 5F73697A 6540AD61 +; ELF: 0280: 6D646873 612E7461 72676574 BD616D64 +; ELF: 0290: 67636E2D 756E6B6E 6F776E2D 616D6468 +; ELF: 02A0: 73612D2D 67667837 3030AE61 6D646873 +; ELF: 02B0: 612E7665 7273696F 6E920101 ; ELF: ) ; ELF: Symbol { ; ELF: Name: simple -; ELF: Size: 288 -; ELF: Type: AMDGPU_HSA_KERNEL (0xA) +; ELF: Size: 32 ; ELF: } ; HSA-NOT: .AMDGPU.config ; HSA: .text -; HSA: .hsa_code_object_version 2,1 -; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" -; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" +; HSA-CI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" +; HSA-VI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx801" -; HSA-LABEL: .amdgpu_hsa_kernel simple -; HSA: {{^}}simple: -; HSA: .amd_kernel_code_t -; HSA: enable_sgpr_private_segment_buffer = 1 -; HSA: enable_sgpr_kernarg_segment_ptr = 1 - -; PRE-GFX10: enable_wavefront_size32 = 0 -; GFX10-W32: .amdhsa_wavefront_size32 1 -; GFX10-W64: .amdhsa_wavefront_size32 0 +; HSA-LABEL: {{^}}simple: ; PRE-GFX10: wavefront_size = 6 -; HSA: call_convention = -1 -; HSA: .end_amd_kernel_code_t ; HSA: s_load_{{dwordx2|b64}} s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0 ; Make sure we are setting the ATC bit: @@ -69,6 +96,13 @@ ; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} ; GFX10: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off +; HSA: .amdhsa_user_sgpr_private_segment_buffer 1 +; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1 + +; PRE-GFX10-NOT: .amdhsa_wavefront_size32 +; GFX10-W32: .amdhsa_wavefront_size32 1 +; GFX10-W64: .amdhsa_wavefront_size32 0 + ; HSA: .Lfunc_end0: ; HSA: .size simple, .Lfunc_end0-simple @@ -78,8 +112,8 @@ ret void } -; HSA-LABEL: .amdgpu_hsa_kernel simple_no_kernargs -; HSA: enable_sgpr_kernarg_segment_ptr = 0 +; HSA-LABEL: {{^}}simple_no_kernargs: +; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 0 define amdgpu_kernel void @simple_no_kernargs() { entry: store volatile i32 0, ptr addrspace(1) undef @@ -87,4 +121,4 @@ } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -7,75 +7,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) { ; GCN-LABEL: test_indirect_call_sgpr_ptr: -; GCN: .amd_kernel_code_t -; GCN-NEXT: amd_code_version_major = 1 -; GCN-NEXT: amd_code_version_minor = 2 -; GCN-NEXT: amd_machine_kind = 1 -; GCN-NEXT: amd_machine_version_major = 7 -; GCN-NEXT: amd_machine_version_minor = 0 -; GCN-NEXT: amd_machine_version_stepping = 0 -; GCN-NEXT: kernel_code_entry_byte_offset = 256 -; GCN-NEXT: kernel_code_prefetch_byte_size = 0 -; GCN-NEXT: granulated_workitem_vgpr_count = 10 -; GCN-NEXT: granulated_wavefront_sgpr_count = 8 -; GCN-NEXT: priority = 0 -; GCN-NEXT: float_mode = 240 -; GCN-NEXT: priv = 0 -; GCN-NEXT: enable_dx10_clamp = 1 -; GCN-NEXT: debug_mode = 0 -; GCN-NEXT: enable_ieee_mode = 1 -; GCN-NEXT: enable_wgp_mode = 0 -; GCN-NEXT: enable_mem_ordered = 0 -; GCN-NEXT: enable_fwd_progress = 0 -; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GCN-NEXT: user_sgpr_count = 14 -; GCN-NEXT: enable_trap_handler = 0 -; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_y = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_z = 1 -; GCN-NEXT: enable_sgpr_workgroup_info = 0 -; GCN-NEXT: enable_vgpr_workitem_id = 2 -; GCN-NEXT: enable_exception_msb = 0 -; GCN-NEXT: granulated_lds_size = 0 -; GCN-NEXT: enable_exception = 0 -; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 -; GCN-NEXT: enable_sgpr_dispatch_ptr = 1 -; GCN-NEXT: enable_sgpr_queue_ptr = 1 -; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GCN-NEXT: enable_sgpr_dispatch_id = 1 -; GCN-NEXT: enable_sgpr_flat_scratch_init = 1 -; GCN-NEXT: enable_sgpr_private_segment_size = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GCN-NEXT: enable_wavefront_size32 = 0 -; GCN-NEXT: enable_ordered_append_gds = 0 -; GCN-NEXT: private_element_size = 1 -; GCN-NEXT: is_ptr64 = 1 -; GCN-NEXT: is_dynamic_callstack = 1 -; GCN-NEXT: is_debug_enabled = 0 -; GCN-NEXT: is_xnack_enabled = 0 -; GCN-NEXT: workitem_private_segment_byte_size = 16384 -; GCN-NEXT: workgroup_group_segment_byte_size = 0 -; GCN-NEXT: gds_segment_byte_size = 0 -; GCN-NEXT: kernarg_segment_byte_size = 64 -; GCN-NEXT: workgroup_fbarrier_count = 0 -; GCN-NEXT: wavefront_sgpr_count = 68 -; GCN-NEXT: workitem_vgpr_count = 42 -; GCN-NEXT: reserved_vgpr_first = 0 -; GCN-NEXT: reserved_vgpr_count = 0 -; GCN-NEXT: reserved_sgpr_first = 0 -; GCN-NEXT: reserved_sgpr_count = 0 -; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 -; GCN-NEXT: kernarg_segment_alignment = 4 -; GCN-NEXT: group_segment_alignment = 4 -; GCN-NEXT: private_segment_alignment = 4 -; GCN-NEXT: wavefront_size = 6 -; GCN-NEXT: call_convention = -1 -; GCN-NEXT: runtime_loader_kernel_symbol = 0 -; GCN-NEXT: .end_amd_kernel_code_t -; GCN-NEXT: ; %bb.0: +; GCN: ; %bb.0: ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 ; GCN-NEXT: s_add_i32 s12, s12, s17 @@ -100,75 +32,7 @@ ; GCN-NEXT: s_endpgm ; ; GISEL-LABEL: test_indirect_call_sgpr_ptr: -; GISEL: .amd_kernel_code_t -; GISEL-NEXT: amd_code_version_major = 1 -; GISEL-NEXT: amd_code_version_minor = 2 -; GISEL-NEXT: amd_machine_kind = 1 -; GISEL-NEXT: amd_machine_version_major = 7 -; GISEL-NEXT: amd_machine_version_minor = 0 -; GISEL-NEXT: amd_machine_version_stepping = 0 -; GISEL-NEXT: kernel_code_entry_byte_offset = 256 -; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 -; GISEL-NEXT: granulated_workitem_vgpr_count = 10 -; GISEL-NEXT: granulated_wavefront_sgpr_count = 8 -; GISEL-NEXT: priority = 0 -; GISEL-NEXT: float_mode = 240 -; GISEL-NEXT: priv = 0 -; GISEL-NEXT: enable_dx10_clamp = 1 -; GISEL-NEXT: debug_mode = 0 -; GISEL-NEXT: enable_ieee_mode = 1 -; GISEL-NEXT: enable_wgp_mode = 0 -; GISEL-NEXT: enable_mem_ordered = 0 -; GISEL-NEXT: enable_fwd_progress = 0 -; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GISEL-NEXT: user_sgpr_count = 14 -; GISEL-NEXT: enable_trap_handler = 0 -; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1 -; GISEL-NEXT: enable_sgpr_workgroup_info = 0 -; GISEL-NEXT: enable_vgpr_workitem_id = 2 -; GISEL-NEXT: enable_exception_msb = 0 -; GISEL-NEXT: granulated_lds_size = 0 -; GISEL-NEXT: enable_exception = 0 -; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1 -; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1 -; GISEL-NEXT: enable_sgpr_queue_ptr = 1 -; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GISEL-NEXT: enable_sgpr_dispatch_id = 1 -; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1 -; GISEL-NEXT: enable_sgpr_private_segment_size = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GISEL-NEXT: enable_wavefront_size32 = 0 -; GISEL-NEXT: enable_ordered_append_gds = 0 -; GISEL-NEXT: private_element_size = 1 -; GISEL-NEXT: is_ptr64 = 1 -; GISEL-NEXT: is_dynamic_callstack = 1 -; GISEL-NEXT: is_debug_enabled = 0 -; GISEL-NEXT: is_xnack_enabled = 0 -; GISEL-NEXT: workitem_private_segment_byte_size = 16384 -; GISEL-NEXT: workgroup_group_segment_byte_size = 0 -; GISEL-NEXT: gds_segment_byte_size = 0 -; GISEL-NEXT: kernarg_segment_byte_size = 64 -; GISEL-NEXT: workgroup_fbarrier_count = 0 -; GISEL-NEXT: wavefront_sgpr_count = 68 -; GISEL-NEXT: workitem_vgpr_count = 42 -; GISEL-NEXT: reserved_vgpr_first = 0 -; GISEL-NEXT: reserved_vgpr_count = 0 -; GISEL-NEXT: reserved_sgpr_first = 0 -; GISEL-NEXT: reserved_sgpr_count = 0 -; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0 -; GISEL-NEXT: kernarg_segment_alignment = 4 -; GISEL-NEXT: group_segment_alignment = 4 -; GISEL-NEXT: private_segment_alignment = 4 -; GISEL-NEXT: wavefront_size = 6 -; GISEL-NEXT: call_convention = -1 -; GISEL-NEXT: runtime_loader_kernel_symbol = 0 -; GISEL-NEXT: .end_amd_kernel_code_t -; GISEL-NEXT: ; %bb.0: +; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 ; GISEL-NEXT: s_add_i32 s12, s12, s17 @@ -198,75 +62,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) { ; GCN-LABEL: test_indirect_call_sgpr_ptr_arg: -; GCN: .amd_kernel_code_t -; GCN-NEXT: amd_code_version_major = 1 -; GCN-NEXT: amd_code_version_minor = 2 -; GCN-NEXT: amd_machine_kind = 1 -; GCN-NEXT: amd_machine_version_major = 7 -; GCN-NEXT: amd_machine_version_minor = 0 -; GCN-NEXT: amd_machine_version_stepping = 0 -; GCN-NEXT: kernel_code_entry_byte_offset = 256 -; GCN-NEXT: kernel_code_prefetch_byte_size = 0 -; GCN-NEXT: granulated_workitem_vgpr_count = 10 -; GCN-NEXT: granulated_wavefront_sgpr_count = 8 -; GCN-NEXT: priority = 0 -; GCN-NEXT: float_mode = 240 -; GCN-NEXT: priv = 0 -; GCN-NEXT: enable_dx10_clamp = 1 -; GCN-NEXT: debug_mode = 0 -; GCN-NEXT: enable_ieee_mode = 1 -; GCN-NEXT: enable_wgp_mode = 0 -; GCN-NEXT: enable_mem_ordered = 0 -; GCN-NEXT: enable_fwd_progress = 0 -; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GCN-NEXT: user_sgpr_count = 14 -; GCN-NEXT: enable_trap_handler = 0 -; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_y = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_z = 1 -; GCN-NEXT: enable_sgpr_workgroup_info = 0 -; GCN-NEXT: enable_vgpr_workitem_id = 2 -; GCN-NEXT: enable_exception_msb = 0 -; GCN-NEXT: granulated_lds_size = 0 -; GCN-NEXT: enable_exception = 0 -; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 -; GCN-NEXT: enable_sgpr_dispatch_ptr = 1 -; GCN-NEXT: enable_sgpr_queue_ptr = 1 -; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GCN-NEXT: enable_sgpr_dispatch_id = 1 -; GCN-NEXT: enable_sgpr_flat_scratch_init = 1 -; GCN-NEXT: enable_sgpr_private_segment_size = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GCN-NEXT: enable_wavefront_size32 = 0 -; GCN-NEXT: enable_ordered_append_gds = 0 -; GCN-NEXT: private_element_size = 1 -; GCN-NEXT: is_ptr64 = 1 -; GCN-NEXT: is_dynamic_callstack = 1 -; GCN-NEXT: is_debug_enabled = 0 -; GCN-NEXT: is_xnack_enabled = 0 -; GCN-NEXT: workitem_private_segment_byte_size = 16384 -; GCN-NEXT: workgroup_group_segment_byte_size = 0 -; GCN-NEXT: gds_segment_byte_size = 0 -; GCN-NEXT: kernarg_segment_byte_size = 64 -; GCN-NEXT: workgroup_fbarrier_count = 0 -; GCN-NEXT: wavefront_sgpr_count = 68 -; GCN-NEXT: workitem_vgpr_count = 42 -; GCN-NEXT: reserved_vgpr_first = 0 -; GCN-NEXT: reserved_vgpr_count = 0 -; GCN-NEXT: reserved_sgpr_first = 0 -; GCN-NEXT: reserved_sgpr_count = 0 -; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 -; GCN-NEXT: kernarg_segment_alignment = 4 -; GCN-NEXT: group_segment_alignment = 4 -; GCN-NEXT: private_segment_alignment = 4 -; GCN-NEXT: wavefront_size = 6 -; GCN-NEXT: call_convention = -1 -; GCN-NEXT: runtime_loader_kernel_symbol = 0 -; GCN-NEXT: .end_amd_kernel_code_t -; GCN-NEXT: ; %bb.0: +; GCN: ; %bb.0: ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 ; GCN-NEXT: s_add_i32 s12, s12, s17 @@ -292,75 +88,7 @@ ; GCN-NEXT: s_endpgm ; ; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg: -; GISEL: .amd_kernel_code_t -; GISEL-NEXT: amd_code_version_major = 1 -; GISEL-NEXT: amd_code_version_minor = 2 -; GISEL-NEXT: amd_machine_kind = 1 -; GISEL-NEXT: amd_machine_version_major = 7 -; GISEL-NEXT: amd_machine_version_minor = 0 -; GISEL-NEXT: amd_machine_version_stepping = 0 -; GISEL-NEXT: kernel_code_entry_byte_offset = 256 -; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 -; GISEL-NEXT: granulated_workitem_vgpr_count = 10 -; GISEL-NEXT: granulated_wavefront_sgpr_count = 8 -; GISEL-NEXT: priority = 0 -; GISEL-NEXT: float_mode = 240 -; GISEL-NEXT: priv = 0 -; GISEL-NEXT: enable_dx10_clamp = 1 -; GISEL-NEXT: debug_mode = 0 -; GISEL-NEXT: enable_ieee_mode = 1 -; GISEL-NEXT: enable_wgp_mode = 0 -; GISEL-NEXT: enable_mem_ordered = 0 -; GISEL-NEXT: enable_fwd_progress = 0 -; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GISEL-NEXT: user_sgpr_count = 14 -; GISEL-NEXT: enable_trap_handler = 0 -; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1 -; GISEL-NEXT: enable_sgpr_workgroup_info = 0 -; GISEL-NEXT: enable_vgpr_workitem_id = 2 -; GISEL-NEXT: enable_exception_msb = 0 -; GISEL-NEXT: granulated_lds_size = 0 -; GISEL-NEXT: enable_exception = 0 -; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1 -; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1 -; GISEL-NEXT: enable_sgpr_queue_ptr = 1 -; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GISEL-NEXT: enable_sgpr_dispatch_id = 1 -; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1 -; GISEL-NEXT: enable_sgpr_private_segment_size = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GISEL-NEXT: enable_wavefront_size32 = 0 -; GISEL-NEXT: enable_ordered_append_gds = 0 -; GISEL-NEXT: private_element_size = 1 -; GISEL-NEXT: is_ptr64 = 1 -; GISEL-NEXT: is_dynamic_callstack = 1 -; GISEL-NEXT: is_debug_enabled = 0 -; GISEL-NEXT: is_xnack_enabled = 0 -; GISEL-NEXT: workitem_private_segment_byte_size = 16384 -; GISEL-NEXT: workgroup_group_segment_byte_size = 0 -; GISEL-NEXT: gds_segment_byte_size = 0 -; GISEL-NEXT: kernarg_segment_byte_size = 64 -; GISEL-NEXT: workgroup_fbarrier_count = 0 -; GISEL-NEXT: wavefront_sgpr_count = 68 -; GISEL-NEXT: workitem_vgpr_count = 42 -; GISEL-NEXT: reserved_vgpr_first = 0 -; GISEL-NEXT: reserved_vgpr_count = 0 -; GISEL-NEXT: reserved_sgpr_first = 0 -; GISEL-NEXT: reserved_sgpr_count = 0 -; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0 -; GISEL-NEXT: kernarg_segment_alignment = 4 -; GISEL-NEXT: group_segment_alignment = 4 -; GISEL-NEXT: private_segment_alignment = 4 -; GISEL-NEXT: wavefront_size = 6 -; GISEL-NEXT: call_convention = -1 -; GISEL-NEXT: runtime_loader_kernel_symbol = 0 -; GISEL-NEXT: .end_amd_kernel_code_t -; GISEL-NEXT: ; %bb.0: +; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 ; GISEL-NEXT: s_add_i32 s12, s12, s17 @@ -1899,4 +1627,4 @@ } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll b/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll --- a/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll @@ -5,21 +5,22 @@ ; accounted for, as well as legalization of types changing offsets. ; FUNC-LABEL: {{^}}i1_arg: -; HSA-VI: kernarg_segment_byte_size = 12 -; HSA-VI: kernarg_segment_alignment = 4 ; GCN: s_load_dword s ; GCN: s_and_b32 + +; HSA-VI: .amdhsa_kernarg_size 12 define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) nounwind { store i1 %x, ptr addrspace(1) %out, align 1 ret void } ; FUNC-LABEL: {{^}}v3i8_arg: -; HSA-VI: kernarg_segment_byte_size = 12 -; HSA-VI: kernarg_segment_alignment = 4 + ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0 + +; HSA-VI: .amdhsa_kernarg_size 12 define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %in) nounwind { entry: store <3 x i8> %in, ptr addrspace(1) %out, align 4 @@ -27,9 +28,9 @@ } ; FUNC-LABEL: {{^}}i65_arg: -; HSA-VI: kernarg_segment_byte_size = 24 -; HSA-VI: kernarg_segment_alignment = 4 ; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0 + +; HSA-VI: .amdhsa_kernarg_size 24 define amdgpu_kernel void @i65_arg(ptr addrspace(1) nocapture %out, i65 %in) nounwind { entry: store i65 %in, ptr addrspace(1) %out, align 4 @@ -37,7 +38,7 @@ } ; FUNC-LABEL: {{^}}empty_struct_arg: -; HSA-VI: kernarg_segment_byte_size = 0 +; HSA-VI: .amdhsa_kernarg_size 0 define amdgpu_kernel void @empty_struct_arg({} %in) nounwind { ret void } @@ -53,11 +54,12 @@ ; FIXME: Total argument size is computed wrong ; FUNC-LABEL: {{^}}struct_argument_alignment: -; HSA-VI: kernarg_segment_byte_size = 40 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x18 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20 + +; HSA-VI: .amdhsa_kernarg_size 40 define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32, i64} %arg1) { %val0 = extractvalue {i32, i64} %arg0, 0 %val1 = extractvalue {i32, i64} %arg0, 1 @@ -73,12 +75,13 @@ ; No padding between i8 and next struct, but round up at end to 4 byte ; multiple. ; FUNC-LABEL: {{^}}packed_struct_argument_alignment: -; HSA-VI: kernarg_segment_byte_size = 28 ; HSA-VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; HSA-VI: global_load_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}} offset:13 ; HSA-VI: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}} offset:17 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4 + +; HSA-VI: .amdhsa_kernarg_size 28 define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) { %val0 = extractvalue <{i32, i64}> %arg0, 0 %val1 = extractvalue <{i32, i64}> %arg0, 1 @@ -92,12 +95,13 @@ } ; GCN-LABEL: {{^}}struct_argument_alignment_after: -; HSA-VI: kernarg_segment_byte_size = 64 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x18 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20 ; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x30 + +; HSA-VI: .amdhsa_kernarg_size 64 define amdgpu_kernel void @struct_argument_alignment_after({i32, i64} %arg0, i8, {i32, i64} %arg2, i8, <4 x i32> %arg4) { %val0 = extractvalue {i32, i64} %arg0, 0 %val1 = extractvalue {i32, i64} %arg0, 1 @@ -151,9 +155,9 @@ ; Byref pointers should only be treated as offsets from kernarg ; GCN-LABEL: {{^}}byref_constant_i8_arg: -; GCN: kernarg_segment_byte_size = 12 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GCN: global_load_ubyte v{{[0-9]+}}, [[ZERO]], s[4:5] offset:8 +; GCN: .amdhsa_kernarg_size 12 define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) { %in = load i8, ptr addrspace(4) %in.byref %ext = zext i8 %in to i32 @@ -162,9 +166,9 @@ } ; GCN-LABEL: {{^}}byref_constant_i16_arg: -; GCN: kernarg_segment_byte_size = 12 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GCN: global_load_ushort v{{[0-9]+}}, [[ZERO]], s[4:5] offset:8 +; GCN: .amdhsa_kernarg_size 12 define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) %in.byref) { %in = load i16, ptr addrspace(4) %in.byref %ext = zext i16 %in to i32 @@ -173,8 +177,8 @@ } ; GCN-LABEL: {{^}}byref_constant_i32_arg: -; GCN: kernarg_segment_byte_size = 16 ; GCN: s_load_dwordx4 [[LOAD:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}} +; GCN: .amdhsa_kernarg_size 16 define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in.byref, i32 %after.offset) { %in = load i32, ptr addrspace(4) %in.byref store volatile i32 %in, ptr addrspace(1) %out, align 4 @@ -183,9 +187,9 @@ } ; GCN-LABEL: {{^}}byref_constant_v4i32_arg: -; GCN: kernarg_segment_byte_size = 36 ; GCN: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x10{{$}} ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x20{{$}} +; GCN: .amdhsa_kernarg_size 36 define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) %in.byref, i32 %after.offset) { %in = load <4 x i32>, ptr addrspace(4) %in.byref store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4 @@ -194,12 +198,12 @@ } ; GCN-LABEL: {{^}}byref_align_constant_i32_arg: -; GCN: kernarg_segment_byte_size = 264 ; GCN-DAG: s_load_dwordx2 s[[[IN:[0-9]+]]:[[AFTER_OFFSET:[0-9]+]]], s[4:5], 0x100{{$}} ; GCN-DAG: v_mov_b32_e32 [[V_IN:v[0-9]+]], s[[IN]] ; GCN-DAG: v_mov_b32_e32 [[V_AFTER_OFFSET:v[0-9]+]], s[[AFTER_OFFSET]] ; GCN: global_store_dword v{{[0-9]+}}, [[V_IN]], s ; GCN: global_store_dword v{{[0-9]+}}, [[V_AFTER_OFFSET]], s +; GCN: .amdhsa_kernarg_size 264 define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) { %in = load i32, ptr addrspace(4) %in.byref store volatile i32 %in, ptr addrspace(1) %out, align 4 @@ -208,9 +212,9 @@ } ; GCN-LABEL: {{^}}byref_natural_align_constant_v16i32_arg: -; GCN: kernarg_segment_byte_size = 132 ; GCN-DAG: s_load_dword s{{[0-9]+}}, s[4:5], 0x80 ; GCN-DAG: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x40{{$}} +; GCN: .amdhsa_kernarg_size 132 define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) { %in = load <16 x i32>, ptr addrspace(4) %in.byref store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4 @@ -220,8 +224,8 @@ ; Also accept byref kernel arguments with other global address spaces. ; GCN-LABEL: {{^}}byref_global_i32_arg: -; GCN: kernarg_segment_byte_size = 12 ; GCN: s_load_dword [[IN:s[0-9]+]], s[4:5], 0x8{{$}} +; GCN: .amdhsa_kernarg_size 12 define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) %in.byref) { %in = load i32, ptr addrspace(1) %in.byref store i32 %in, ptr addrspace(1) %out, align 4 @@ -253,8 +257,8 @@ ; } ; GCN-LABEL: {{^}}multi_byref_constant_i32_arg: -; GCN: kernarg_segment_byte_size = 20 ; GCN: s_load_dwordx4 {{s\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0 +; GCN: .amdhsa_kernarg_size 20 define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in0.byref, ptr addrspace(4) byref(i32) %in1.byref, i32 %after.offset) { %in0 = load i32, ptr addrspace(4) %in0.byref %in1 = load i32, ptr addrspace(4) %in1.byref @@ -265,10 +269,10 @@ } ; GCN-LABEL: {{^}}byref_constant_i32_arg_offset0: -; GCN: kernarg_segment_byte_size = 4 ; GCN-NOT: s4 ; GCN-NOT: s5 ; GCN: s_load_dword {{s[0-9]+}}, s[4:5], 0x0{{$}} +; GCN: .amdhsa_kernarg_size 4 define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) %in.byref) { %in = load i32, ptr addrspace(4) %in.byref store i32 %in, ptr addrspace(1) undef, align 4 @@ -276,4 +280,4 @@ } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll --- a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll @@ -14,7 +14,7 @@ ; HSA-LABEL: {{^}}test_no_round_size_1: -; HSA: workgroup_group_segment_byte_size = 38 +; HSA: .amdhsa_group_segment_fixed_size 38 define amdgpu_kernel void @test_no_round_size_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.align16.0, ptr addrspace(1) align 4 %in, i32 38, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 @lds.align16.0, i32 38, i1 false) @@ -31,8 +31,7 @@ ; add the alignment padding if necessary alignment padding if needed. ; HSA-LABEL: {{^}}test_round_size_2: -; HSA: workgroup_group_segment_byte_size = 86 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 86 define amdgpu_kernel void @test_round_size_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.align16.0, ptr addrspace(1) align 4 %in, i32 38, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 @lds.align16.0, i32 38, i1 false) @@ -45,8 +44,7 @@ ; 38 + (10 pad) + 38 (= 86) ; HSA-LABEL: {{^}}test_round_size_2_align_8: -; HSA: workgroup_group_segment_byte_size = 86 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 86 define amdgpu_kernel void @test_round_size_2_align_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align16.0, ptr addrspace(1) align 8 %in, i32 38, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align16.0, i32 38, i1 false) @@ -58,8 +56,7 @@ } ; HSA-LABEL: {{^}}test_round_local_lds_and_arg: -; HSA: workgroup_group_segment_byte_size = 38 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 38 define amdgpu_kernel void @test_round_local_lds_and_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) %lds.arg) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.align16.0, ptr addrspace(1) align 4 %in, i32 38, i1 false) @@ -70,8 +67,7 @@ } ; HSA-LABEL: {{^}}test_round_lds_arg: -; HSA: workgroup_group_segment_byte_size = 0 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 0 define amdgpu_kernel void @test_round_lds_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) %lds.arg) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 %lds.arg, ptr addrspace(1) align 4 %in, i32 38, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 %lds.arg, i32 38, i1 false) @@ -80,8 +76,7 @@ ; FIXME: Parameter alignment not considered ; HSA-LABEL: {{^}}test_high_align_lds_arg: -; HSA: workgroup_group_segment_byte_size = 0 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 0 define amdgpu_kernel void @test_high_align_lds_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) align 64 %lds.arg) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 64 %lds.arg, ptr addrspace(1) align 64 %in, i32 38, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 64 %out, ptr addrspace(3) align 64 %lds.arg, i32 38, i1 false) @@ -90,8 +85,7 @@ ; (39 * 4) + (4 pad) + (7 * 8) = 216 ; HSA-LABEL: {{^}}test_missing_alignment_size_2_order0: -; HSA: workgroup_group_segment_byte_size = 216 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 216 define amdgpu_kernel void @test_missing_alignment_size_2_order0(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.missing.align.0, ptr addrspace(1) align 4 %in, i32 160, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 @lds.missing.align.0, i32 160, i1 false) @@ -104,8 +98,7 @@ ; (39 * 4) + (4 pad) + (7 * 8) = 216 ; HSA-LABEL: {{^}}test_missing_alignment_size_2_order1: -; HSA: workgroup_group_segment_byte_size = 216 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 216 define amdgpu_kernel void @test_missing_alignment_size_2_order1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.missing.align.1, ptr addrspace(1) align 8 %in, i32 56, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.missing.align.1, i32 56, i1 false) @@ -119,8 +112,7 @@ ; align 32, 16, 16 ; 38 + (10 pad) + 38 + (10 pad) + 38 ( = 134) ; HSA-LABEL: {{^}}test_round_size_3_order0: -; HSA: workgroup_group_segment_byte_size = 134 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 134 define amdgpu_kernel void @test_round_size_3_order0(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align32.0, ptr addrspace(1) align 8 %in, i32 38, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align32.0, i32 38, i1 false) @@ -137,8 +129,7 @@ ; align 32, 16, 16 ; 38 (+ 10 pad) + 38 + (10 pad) + 38 ( = 134) ; HSA-LABEL: {{^}}test_round_size_3_order1: -; HSA: workgroup_group_segment_byte_size = 134 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 134 define amdgpu_kernel void @test_round_size_3_order1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align32.0, ptr addrspace(1) align 8 %in, i32 38, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align32.0, i32 38, i1 false) @@ -155,8 +146,7 @@ ; align 32, 16, 16 ; 38 + (10 pad) + 38 + (10 pad) + 38 ( = 126) ; HSA-LABEL: {{^}}test_round_size_3_order2: -; HSA: workgroup_group_segment_byte_size = 134 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 134 define amdgpu_kernel void @test_round_size_3_order2(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align16.0, ptr addrspace(1) align 8 %in, i32 38, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align16.0, i32 38, i1 false) @@ -173,8 +163,7 @@ ; align 32, 16, 16 ; 38 + (10 pad) + 38 + (10 pad) + 38 ( = 134) ; HSA-LABEL: {{^}}test_round_size_3_order3: -; HSA: workgroup_group_segment_byte_size = 134 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 134 define amdgpu_kernel void @test_round_size_3_order3(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align16.0, ptr addrspace(1) align 8 %in, i32 38, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align16.0, i32 38, i1 false) @@ -191,8 +180,7 @@ ; align 32, 16, 16 ; 38 + (10 pad) + 38 + (10 pad) + 38 (= 134) ; HSA-LABEL: {{^}}test_round_size_3_order4: -; HSA: workgroup_group_segment_byte_size = 134 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 134 define amdgpu_kernel void @test_round_size_3_order4(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align8.0, ptr addrspace(1) align 8 %in, i32 38, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align8.0, i32 38, i1 false) @@ -209,8 +197,7 @@ ; align 32, 16, 16 ; 38 + (10 pad) + 38 + (10 pad) + 38 (= 134) ; HSA-LABEL: {{^}}test_round_size_3_order5: -; HSA: workgroup_group_segment_byte_size = 134 -; HSA: group_segment_alignment = 4 +; HSA: .amdhsa_group_segment_fixed_size 134 define amdgpu_kernel void @test_round_size_3_order5(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align8.0, ptr addrspace(1) align 8 %in, i32 38, i1 false) call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align8.0, i32 38, i1 false) @@ -229,4 +216,4 @@ attributes #2 = { convergent nounwind } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/lds-size.ll b/llvm/test/CodeGen/AMDGPU/lds-size.ll --- a/llvm/test/CodeGen/AMDGPU/lds-size.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-size.ll @@ -12,8 +12,8 @@ ; EG-NEXT: .long 1 ; ALL: {{^}}test: -; HSA: granulated_lds_size = 0 -; HSA: workgroup_group_segment_byte_size = 4 +; HSA-NOT: COMPUTE_PGM_RSRC2.LDS_SIZE +; HSA: .amdhsa_group_segment_fixed_size 4 ; GCN: ; LDSByteSize: 4 bytes/workgroup (compile time only) @lds = internal unnamed_addr addrspace(3) global i32 undef, align 4 @@ -36,4 +36,4 @@ } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll @@ -3,12 +3,11 @@ declare i64 @llvm.amdgcn.dispatch.id() #1 ; GCN-LABEL: {{^}}dispatch_id: -; GCN: .amd_kernel_code_t -; GCN: enable_sgpr_dispatch_id = 1 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]] +; GCN: .amdhsa_user_sgpr_dispatch_id 1 define amdgpu_kernel void @dispatch_id(ptr addrspace(1) %out) #0 { %tmp0 = call i64 @llvm.amdgcn.dispatch.id() store i64 %tmp0, ptr addrspace(1) %out @@ -19,4 +18,4 @@ attributes #1 = { nounwind readnone } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll @@ -4,8 +4,8 @@ ; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target ; GCN-LABEL: {{^}}test: -; GCN: enable_sgpr_dispatch_ptr = 1 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 +; GCN: .amdhsa_user_sgpr_dispatch_ptr 1 define amdgpu_kernel void @test(ptr addrspace(1) %out) { %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 %value = load i32, ptr addrspace(4) %dispatch_ptr @@ -14,11 +14,11 @@ } ; GCN-LABEL: {{^}}test2 -; GCN: enable_sgpr_dispatch_ptr = 1 ; GCN: s_load_dword s[[REG:[0-9]+]], s[4:5], 0x1 ; GCN: s_lshr_b32 s{{[0-9]+}}, s[[REG]], 16 ; GCN-NOT: load_ushort ; GCN: s_endpgm +; GCN: .amdhsa_user_sgpr_dispatch_ptr 1 define amdgpu_kernel void @test2(ptr addrspace(1) %out) { %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 %d1 = getelementptr inbounds i8, ptr addrspace(4) %dispatch_ptr, i64 6 @@ -33,4 +33,4 @@ attributes #0 = { readnone } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -1,11 +1,8 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA %s -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,COV5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,MESA %s ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty: -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 56 -; HSA: kernarg_segment_alignment = 4 ; MESA: enable_sgpr_kernarg_segment_ptr = 1 ; MESA: kernarg_segment_byte_size = 16 @@ -13,6 +10,7 @@ ; HSA: s_load_dword s0, s[4:5], 0x0 +; COV4: .amdhsa_kernarg_size 56 ; COV5: .amdhsa_kernarg_size 256 define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 { %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -21,10 +19,6 @@ } ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty_0implicit: -; HSA: enable_sgpr_kernarg_segment_ptr = 0 -; HSA: kernarg_segment_byte_size = 0 -; HSA: kernarg_segment_alignment = 4 - ; MESA: enable_sgpr_kernarg_segment_ptr = 1 ; MESA: kernarg_segment_byte_size = 16 ; MESA: kernarg_segment_alignment = 4 @@ -34,6 +28,7 @@ ; MESA: s_load_dword s0, s[4:5], 0x0 +; COV4: .amdhsa_kernarg_size 0 ; COV5: .amdhsa_kernarg_size 0 define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 { %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -43,17 +38,13 @@ ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty: -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 48 -; HSA: kernarg_segment_alignment = 4 - ; MESA: enable_sgpr_kernarg_segment_ptr = 1 ; MESA: kernarg_segment_byte_size = 16 ; MESA: kernarg_segment_alignment = 4 ; HSA: s_load_dword s0, s[4:5], 0x0 -; COV5: .amdhsa_kernarg_size 48 +; HSA: .amdhsa_kernarg_size 48 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 { %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr @@ -62,16 +53,13 @@ ; GCN-LABEL: {{^}}kernel_implicitarg_ptr: -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 168 -; HSA: kernarg_segment_alignment = 4 - ; MESA: enable_sgpr_kernarg_segment_ptr = 1 ; MESA: kernarg_segment_byte_size = 128 ; MESA: kernarg_segment_alignment = 4 ; HSA: s_load_dword s0, s[4:5], 0x1c +; COV4: .amdhsa_kernarg_size 168 ; COV5: .amdhsa_kernarg_size 368 define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 { %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -81,17 +69,13 @@ ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr: -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 160 -; HSA: kernarg_segment_alignment = 4 - ; MESA: enable_sgpr_kernarg_segment_ptr = 1 ; MESA: kernarg_segment_byte_size = 128 ; MESA: kernarg_segment_alignment = 4 ; HSA: s_load_dword s0, s[4:5], 0x1c -; COV5: .amdhsa_kernarg_size 160 +; HSA: .amdhsa_kernarg_size 160 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 { %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr @@ -121,9 +105,6 @@ } ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty: -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 56 -; HSA: kernarg_segment_alignment = 4 ; MESA: enable_sgpr_kernarg_segment_ptr = 1 ; MESA: kernarg_segment_byte_size = 16 @@ -132,6 +113,7 @@ ; GCN: s_mov_b64 s[8:9], s[4:5] ; GCN: s_swappc_b64 +; COV4: .amdhsa_kernarg_size 56 ; COV5: .amdhsa_kernarg_size 256 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 { call void @func_implicitarg_ptr() @@ -139,10 +121,6 @@ } ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty_implicit0: -; HSA: enable_sgpr_kernarg_segment_ptr = 0 -; HSA: kernarg_segment_byte_size = 0 -; HSA: kernarg_segment_alignment = 4 - ; MESA: enable_sgpr_kernarg_segment_ptr = 1 ; MESA: kernarg_segment_byte_size = 16 ; MESA: kernarg_segment_alignment = 4 @@ -151,16 +129,13 @@ ; MESA: s_mov_b64 s[8:9], s[4:5]{{$}} ; GCN: s_swappc_b64 -; COV5: .amdhsa_kernarg_size 0 +; HSA: .amdhsa_kernarg_size 0 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3 { call void @func_implicitarg_ptr() ret void } ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty: -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 48 -; HSA: kernarg_segment_alignment = 4 ; MESA: enable_sgpr_kernarg_segment_ptr = 1 ; MESA: kernarg_segment_byte_size = 16 ; GCN: s_mov_b64 s[8:9], s[4:5] @@ -168,17 +143,13 @@ ; GCN-NOT: s5 ; GCN: s_swappc_b64 -; COV5: .amdhsa_kernarg_size 48 +; HSA: .amdhsa_kernarg_size 48 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 { call void @func_implicitarg_ptr() ret void } ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func: -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 168 -; HSA: kernarg_segment_alignment = 4 - ; MESA: enable_sgpr_kernarg_segment_ptr = 1 ; MESA: kernarg_segment_byte_size = 128 ; MESA: kernarg_segment_alignment = 4 @@ -189,6 +160,7 @@ ; GCN: s_addc_u32 s9, s5, 0{{$}} ; GCN: s_swappc_b64 +; COV4: .amdhsa_kernarg_size 168 ; COV5: .amdhsa_kernarg_size 368 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 { call void @func_implicitarg_ptr() @@ -196,9 +168,6 @@ } ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func: -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 160 -; HSA: kernarg_segment_alignment = 4 ; MESA: enable_sgpr_kernarg_segment_ptr = 1 ; MESA: kernarg_segment_byte_size = 128 ; MESA: kernarg_segment_alignment = 4 @@ -207,7 +176,7 @@ ; GCN: s_addc_u32 s9, s5, 0{{$}} ; GCN: s_swappc_b64 -; COV5: .amdhsa_kernarg_size 160 +; HSA: .amdhsa_kernarg_size 160 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 { call void @func_implicitarg_ptr() ret void @@ -273,116 +242,69 @@ } ; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding: -; HSA: kernarg_segment_byte_size = 120 -; HSA: kernarg_segment_alignment = 6 ; MESA: kernarg_segment_byte_size = 84 ; MESA: kernarg_segment_alignment = 6 -; COV5: .amdhsa_kernarg_size 120 +; HSA: .amdhsa_kernarg_size 120 define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 { %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr ret void } -; HSA-LABEL: Kernels: -; HSA-LABEL: - Name: kernel_implicitarg_ptr_empty -; HSA: CodeProps: -; HSA: KernargSegmentSize: 56 -; HSA: KernargSegmentAlign: 8 - -; HSA-LABEL: - Name: kernel_implicitarg_ptr_empty_0implicit -; HSA: KernargSegmentSize: 0 -; HSA: KernargSegmentAlign: 4 - -; HSA-LABEL: - Name: opencl_kernel_implicitarg_ptr_empty -; HSA: KernargSegmentSize: 48 -; HSA: KernargSegmentAlign: 8 - -; HSA-LABEL: - Name: kernel_implicitarg_ptr -; HSA: KernargSegmentSize: 168 -; HSA: KernargSegmentAlign: 8 - -; HSA-LABEL: - Name: opencl_kernel_implicitarg_ptr -; HSA: KernargSegmentSize: 160 -; HSA: KernargSegmentAlign: 8 - -; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func_empty -; HSA: KernargSegmentSize: 56 -; HSA: KernargSegmentAlign: 8 - -; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func_empty_implicit0 -; HSA: KernargSegmentSize: 0 -; HSA: KernargSegmentAlign: 4 - -; HSA-LABEL: - Name: opencl_kernel_call_implicitarg_ptr_func_empty -; HSA: KernargSegmentSize: 48 -; HSA: KernargSegmentAlign: 8 - -; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func -; HSA: KernargSegmentSize: 168 -; HSA: KernargSegmentAlign: 8 - -; HSA-LABEL: - Name: opencl_kernel_call_implicitarg_ptr_func -; HSA: KernargSegmentSize: 160 -; HSA: KernargSegmentAlign: 8 - -; HSA-LABEL: - Name: kernel_call_kernarg_implicitarg_ptr_func -; HSA: KernargSegmentSize: 168 -; HSA: KernargSegmentAlign: 8 - -; HSA-LABEL: - Name: kernel_implicitarg_no_struct_align_padding -; HSA: KernargSegmentSize: 120 -; HSA: KernargSegmentAlign: 64 - -; COV5-LABEL: amdhsa.kernels: -; COV5: .kernarg_segment_align: 8 +; HSA-LABEL: amdhsa.kernels: +; HSA: .kernarg_segment_align: 8 ; COV5-NEXT: .kernarg_segment_size: 256 -; COV5-LABEL: .name: kernel_implicitarg_ptr_empty +; COV4-NEXT: .kernarg_segment_size: 56 +; HSA-LABEL: .name: kernel_implicitarg_ptr_empty -; COV5: .kernarg_segment_align: 4 -; COV5-NEXT: .kernarg_segment_size: 0 -; COV5-LABEL: .name: kernel_implicitarg_ptr_empty_0implicit +; HSA: .kernarg_segment_align: 4 +; HSA-NEXT: .kernarg_segment_size: 0 +; HSA-LABEL: .name: kernel_implicitarg_ptr_empty_0implicit -; COV5: .kernarg_segment_align: 8 -; COV5-NEXT: .kernarg_segment_size: 48 -; COV5-LABEL: .name: opencl_kernel_implicitarg_ptr_empty +; HSA: .kernarg_segment_align: 8 +; HSA-NEXT: .kernarg_segment_size: 48 +; HSA-LABEL: .name: opencl_kernel_implicitarg_ptr_empty -; COV5: .kernarg_segment_align: 8 +; HSA: .kernarg_segment_align: 8 ; COV5-NEXT: .kernarg_segment_size: 368 -; COV5-LABEL: .name: kernel_implicitarg_ptr +; COV4-NEXT: .kernarg_segment_size: 168 +; HSA-LABEL: .name: kernel_implicitarg_ptr -; COV5: .kernarg_segment_align: 8 -; COV5-NEXT: .kernarg_segment_size: 160 -; COV5-LABEL: .name: opencl_kernel_implicitarg_ptr +; HSA: .kernarg_segment_align: 8 +; HSA-NEXT: .kernarg_segment_size: 160 +; HSA-LABEL: .name: opencl_kernel_implicitarg_ptr -; COV5: .kernarg_segment_align: 8 +; HSA: .kernarg_segment_align: 8 ; COV5-NEXT: .kernarg_segment_size: 256 -; COV5-LABEL: .name: kernel_call_implicitarg_ptr_func_empty +; COV4-NEXT: .kernarg_segment_size: 56 +; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func_empty -; COV5: .kernarg_segment_align: 4 -; COV5-NEXT: .kernarg_segment_size: 0 -; COV5-LABEL: .name: kernel_call_implicitarg_ptr_func_empty_implicit0 +; HSA: .kernarg_segment_align: 4 +; HSA-NEXT: .kernarg_segment_size: 0 +; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func_empty_implicit0 -; COV5: .kernarg_segment_align: 8 -; COV5-NEXT: .kernarg_segment_size: 48 -; COV5-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func_empty +; HSA: .kernarg_segment_align: 8 +; HSA-NEXT: .kernarg_segment_size: 48 +; HSA-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func_empty -; COV5: .kernarg_segment_align: 8 +; HSA: .kernarg_segment_align: 8 ; COV5-NEXT: .kernarg_segment_size: 368 -; COV5-LABEL: .name: kernel_call_implicitarg_ptr_func +; COV4-NEXT: .kernarg_segment_size: 168 +; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func -; COV5: .kernarg_segment_align: 8 -; COV5-NEXT: .kernarg_segment_size: 160 -; COV5-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func +; HSA: .kernarg_segment_align: 8 +; HSA-NEXT: .kernarg_segment_size: 160 +; HSA-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func -; COV5: .kernarg_segment_align: 8 +; HSA: .kernarg_segment_align: 8 ; COV5-NEXT: .kernarg_segment_size: 368 -; COV5-LABEL: .name: kernel_call_kernarg_implicitarg_ptr_func +; COV4-NEXT: .kernarg_segment_size: 168 +; HSA-LABEL: .name: kernel_call_kernarg_implicitarg_ptr_func -; COV5: .kernarg_segment_align: 64 -; COV5-NEXT: .kernarg_segment_size: 120 -; COV5-LABEL: .name: kernel_implicitarg_no_struct_align_padding +; HSA: .kernarg_segment_align: 64 +; HSA-NEXT: .kernarg_segment_size: 120 +; HSA-LABEL: .name: kernel_implicitarg_no_struct_align_padding declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2 declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -1,15 +1,15 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s -; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,HSA,ALL %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,OS-MESA3D,ALL %s +; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,ALL %s ; ALL-LABEL: {{^}}test: -; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: kernarg_segment_byte_size = 8 -; HSA: kernarg_segment_alignment = 4 - -; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa +; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 1 +; CO-V4: s_load_dword s{{[0-9]+}}, s[4:5], 0xa ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa + +; HSA: .amdhsa_kernarg_size 8 +; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1 define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 { %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 @@ -19,12 +19,12 @@ } ; ALL-LABEL: {{^}}test_implicit: -; HSA: kernarg_segment_byte_size = 64 ; OS-MESA3D: kernarg_segment_byte_size = 24 -; CO-V2: kernarg_segment_alignment = 4 +; OS-MESA3D: kernarg_segment_alignment = 4 ; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15 +; HSA: .amdhsa_kernarg_size 8 define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 { %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10 @@ -34,10 +34,8 @@ } ; ALL-LABEL: {{^}}test_implicit_alignment: -; HSA: kernarg_segment_byte_size = 72 ; OS-MESA3D: kernarg_segment_byte_size = 28 -; CO-V2: kernarg_segment_alignment = 4 - +; OS-MESA3D: kernarg_segment_alignment = 4 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 @@ -45,6 +43,8 @@ ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] ; MESA: buffer_store_dword [[V_VAL]] ; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] + +; HSA: .amdhsa_kernarg_size 12 define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 { %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %val = load i32, ptr addrspace(4) %implicitarg.ptr @@ -53,10 +53,8 @@ } ; ALL-LABEL: {{^}}opencl_test_implicit_alignment -; HSA: kernarg_segment_byte_size = 64 ; OS-MESA3D: kernarg_segment_byte_size = 28 -; CO-V2: kernarg_segment_alignment = 4 - +; OS-MESA3D: kernarg_segment_alignment = 4 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 @@ -64,6 +62,7 @@ ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] ; MESA: buffer_store_dword [[V_VAL]] ; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] +; HSA: .amdhsa_kernarg_size 64 define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 { %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %val = load i32, ptr addrspace(4) %implicitarg.ptr @@ -72,13 +71,13 @@ } ; ALL-LABEL: {{^}}test_no_kernargs: -; CO-V2: enable_sgpr_kernarg_segment_ptr = 0 -; CO-V2: kernarg_segment_byte_size = 0 +; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 0 +; OS-MESA3D: kernarg_segment_byte_size = 0 +; OS-MESA3D: kernarg_segment_alignment = 4 -; CO-V2: kernarg_segment_alignment = 4 -; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}} -; HSA: s_load_dword s{{[0-9]+}}, [[NULL]], 0xa{{$}} +; HSA: .amdhsa_kernarg_size 0 +; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 0 define amdgpu_kernel void @test_no_kernargs() #1 { %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 @@ -88,9 +87,9 @@ } ; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs: -; HSA: kernarg_segment_byte_size = 48 -; OS-MESA3d: kernarg_segment_byte_size = 16 -; CO-V2: kernarg_segment_alignment = 4 +; OS-MESA3D: kernarg_segment_byte_size = 16 +; OS-MESA3D: kernarg_segment_alignment = 4 +; HSA: .amdhsa_kernarg_size 48 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 { %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr @@ -99,9 +98,9 @@ } ; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs_round_up: -; HSA: kernarg_segment_byte_size = 40 ; OS-MESA3D: kernarg_segment_byte_size = 16 -; CO-V2: kernarg_segment_alignment = 4 +; OS-MESA3D: kernarg_segment_alignment = 4 +; HSA: .amdhsa_kernarg_size 40 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 { %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr @@ -109,13 +108,21 @@ ret void } +; ALL-LABEL: {{^}}func_kernarg_segment_ptr: +; ALL: v_mov_b32_e32 v0, 0{{$}} +; ALL: v_mov_b32_e32 v1, 0{{$}} +define ptr addrspace(4) @func_kernarg_segment_ptr() { + %ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() + ret ptr addrspace(4) %ptr +} + declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0 declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 attributes #0 = { nounwind readnone } -attributes #1 = { nounwind } +attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" } attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" } attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll @@ -4,8 +4,8 @@ ; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target ; GCN-LABEL: {{^}}test: -; GCN: enable_sgpr_queue_ptr = 1 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 +; GCN: .amdhsa_user_sgpr_queue_ptr 1 define amdgpu_kernel void @test(ptr addrspace(1) %out) { %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 %value = load i32, ptr addrspace(4) %queue_ptr @@ -18,4 +18,4 @@ attributes #0 = { nounwind readnone } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll @@ -1,9 +1,7 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -11,25 +9,25 @@ ; ALL-LABEL: {{^}}test_workgroup_id_x: -; CO-V2: .amd_kernel_code_t -; CO-V2: user_sgpr_count = 6 -; CO-V2: enable_sgpr_workgroup_id_x = 1 -; CO-V2: enable_sgpr_workgroup_id_y = 0 -; CO-V2: enable_sgpr_workgroup_id_z = 0 -; CO-V2: enable_sgpr_workgroup_info = 0 -; CO-V2: enable_vgpr_workitem_id = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 -; CO-V2: .end_amd_kernel_code_t +; MESA3D: .amd_kernel_code_t +; MESA3D: user_sgpr_count = 6 +; MESA3D: enable_sgpr_workgroup_id_x = 1 +; MESA3D: enable_sgpr_workgroup_id_y = 0 +; MESA3D: enable_sgpr_workgroup_id_z = 0 +; MESA3D: enable_sgpr_workgroup_info = 0 +; MESA3D: enable_vgpr_workitem_id = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_x = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_y = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_z = 0 +; MESA3D: .end_amd_kernel_code_t ; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s2{{$}} -; CO-V2: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}} +; MESA3D: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}} ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 -; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 @@ -41,22 +39,22 @@ } ; ALL-LABEL: {{^}}test_workgroup_id_y: -; CO-V2: user_sgpr_count = 6 -; CO-V2: enable_sgpr_workgroup_id_x = 1 -; CO-V2: enable_sgpr_workgroup_id_y = 1 -; CO-V2: enable_sgpr_workgroup_id_z = 0 -; CO-V2: enable_sgpr_workgroup_info = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 +; MESA3D: user_sgpr_count = 6 +; MESA3D: enable_sgpr_workgroup_id_x = 1 +; MESA3D: enable_sgpr_workgroup_id_y = 1 +; MESA3D: enable_sgpr_workgroup_id_z = 0 +; MESA3D: enable_sgpr_workgroup_info = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_x = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_y = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_z = 0 ; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 -; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 @@ -68,30 +66,30 @@ } ; ALL-LABEL: {{^}}test_workgroup_id_z: -; CO-V2: user_sgpr_count = 6 -; CO-V2: enable_sgpr_workgroup_id_x = 1 -; CO-V2: enable_sgpr_workgroup_id_y = 0 -; CO-V2: enable_sgpr_workgroup_id_z = 1 -; CO-V2: enable_sgpr_workgroup_info = 0 -; CO-V2: enable_vgpr_workitem_id = 0 -; CO-V2: enable_sgpr_private_segment_buffer = 1 -; CO-V2: enable_sgpr_dispatch_ptr = 0 -; CO-V2: enable_sgpr_queue_ptr = 0 -; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 -; CO-V2: enable_sgpr_dispatch_id = 0 -; CO-V2: enable_sgpr_flat_scratch_init = 0 -; CO-V2: enable_sgpr_private_segment_size = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 -; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 +; MESA3D: user_sgpr_count = 6 +; MESA3D: enable_sgpr_workgroup_id_x = 1 +; MESA3D: enable_sgpr_workgroup_id_y = 0 +; MESA3D: enable_sgpr_workgroup_id_z = 1 +; MESA3D: enable_sgpr_workgroup_info = 0 +; MESA3D: enable_vgpr_workitem_id = 0 +; MESA3D: enable_sgpr_private_segment_buffer = 1 +; MESA3D: enable_sgpr_dispatch_ptr = 0 +; MESA3D: enable_sgpr_queue_ptr = 0 +; MESA3D: enable_sgpr_kernarg_segment_ptr = 1 +; MESA3D: enable_sgpr_dispatch_id = 0 +; MESA3D: enable_sgpr_flat_scratch_init = 0 +; MESA3D: enable_sgpr_private_segment_size = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_x = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_y = 0 +; MESA3D: enable_sgpr_grid_workgroup_count_z = 0 ; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 -; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 @@ -106,4 +104,4 @@ attributes #1 = { nounwind } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll @@ -1,11 +1,9 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 @@ -16,7 +14,7 @@ ; MESA-NEXT: .long 132{{$}} ; ALL-LABEL: {{^}}test_workitem_id_x: -; CO-V2: enable_vgpr_workitem_id = 0 +; MESA3D: enable_vgpr_workitem_id = 0 ; ALL-NOT: v0 ; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0 @@ -33,9 +31,9 @@ ; MESA-NEXT: .long 2180{{$}} ; ALL-LABEL: {{^}}test_workitem_id_y: -; CO-V2: enable_vgpr_workitem_id = 1 -; CO-V2-NOT: v1 -; CO-V2: {{buffer|flat}}_store_dword {{.*}}v1 +; MESA3D: enable_vgpr_workitem_id = 1 +; MESA3D-NOT: v1 +; MESA3D: {{buffer|flat}}_store_dword {{.*}}v1 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]] @@ -51,9 +49,9 @@ ; MESA-NEXT: .long 4228{{$}} ; ALL-LABEL: {{^}}test_workitem_id_z: -; CO-V2: enable_vgpr_workitem_id = 2 -; CO-V2-NOT: v2 -; CO-V2: {{buffer|flat}}_store_dword {{.*}}v2 +; MESA3D: enable_vgpr_workitem_id = 2 +; MESA3D-NOT: v2 +; MESA3D: {{buffer|flat}}_store_dword {{.*}}v2 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]] @@ -66,7 +64,7 @@ ; FIXME: Packed tid should avoid the and ; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only: -; CO-V2: enable_vgpr_workitem_id = 0 +; MESA3D: enable_vgpr_workitem_id = 0 ; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 @@ -87,7 +85,7 @@ } ; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only: -; CO-V2: enable_vgpr_workitem_id = 1 +; MESA3D: enable_vgpr_workitem_id = 1 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] @@ -109,7 +107,7 @@ } ; ALL-LABEL: {{^}}test_reqd_workgroup_size_z_only: -; CO-V2: enable_vgpr_workitem_id = 2 +; MESA3D: enable_vgpr_workitem_id = 2 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] @@ -137,4 +135,4 @@ !0 = !{i32 64, i32 1, i32 1} !1 = !{i32 1, i32 64, i32 1} !2 = !{i32 1, i32 1, i32 64} -!3 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} +!3 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll --- a/llvm/test/CodeGen/AMDGPU/nop-data.ll +++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll @@ -76,7 +76,7 @@ ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: s_nop 0 // 0000000001FC: BF800000 +; CHECK-NEXT: s_nop 0 // 0000000000FC: BF800000 ; CHECK-EMPTY: ; CHECK-NEXT: : @@ -87,4 +87,4 @@ } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/private-element-size.ll b/llvm/test/CodeGen/AMDGPU/private-element-size.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/private-element-size.ll +++ /dev/null @@ -1,252 +0,0 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT16,ALL %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-8 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT8,ALL,HSA-ELTGE8 %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT4,ALL %s - - -; ALL-LABEL: {{^}}private_elt_size_v4i32: - -; HSA-ELT16: private_element_size = 3 -; HSA-ELT8: private_element_size = 2 -; HSA-ELT4: private_element_size = 1 - - -; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16 -; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 -; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} - -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24{{$}} -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16 -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40 - -; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen -; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen - - -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}} - -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}} -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}} -define amdgpu_kernel void @private_elt_size_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %idxprom = sext i32 %tid to i64 - %gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom - %index.load = load i32, ptr addrspace(1) %gep.index - %index = and i32 %index.load, 2 - %alloca = alloca [2 x <4 x i32>], align 16, addrspace(5) - %gep1 = getelementptr inbounds [2 x <4 x i32>], ptr addrspace(5) %alloca, i32 0, i32 1 - store <4 x i32> zeroinitializer, ptr addrspace(5) %alloca - store <4 x i32> , ptr addrspace(5) %gep1 - %gep2 = getelementptr inbounds [2 x <4 x i32>], ptr addrspace(5) %alloca, i32 0, i32 %index - %load = load <4 x i32>, ptr addrspace(5) %gep2 - store <4 x i32> %load, ptr addrspace(1) %out - ret void -} - -; ALL-LABEL: {{^}}private_elt_size_v8i32: -; HSA-ELT16: private_element_size = 3 -; HSA-ELT8: private_element_size = 2 -; HSA-ELT4: private_element_size = 1 - -; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 -; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:48 -; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:64 -; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:80 - -; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} -; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} - - -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40 -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:48 -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:56 -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:88 -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:80 -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:72 -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:64 - -; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen -; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen - - -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:48{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:52{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:56{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:60{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:64{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:68{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:72{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:76{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:80{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:84{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:88{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:92{{$}} - -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}} -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}} -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:16{{$}} -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:20{{$}} -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:24{{$}} -; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:28{{$}} -define amdgpu_kernel void @private_elt_size_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %idxprom = sext i32 %tid to i64 - %gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom - %index.load = load i32, ptr addrspace(1) %gep.index - %index = and i32 %index.load, 2 - %alloca = alloca [2 x <8 x i32>], align 32, addrspace(5) - %gep1 = getelementptr inbounds [2 x <8 x i32>], ptr addrspace(5) %alloca, i32 0, i32 1 - store <8 x i32> zeroinitializer, ptr addrspace(5) %alloca - store <8 x i32> , ptr addrspace(5) %gep1 - %gep2 = getelementptr inbounds [2 x <8 x i32>], ptr addrspace(5) %alloca, i32 0, i32 %index - %load = load <8 x i32>, ptr addrspace(5) %gep2 - store <8 x i32> %load, ptr addrspace(1) %out - ret void -} - - -; ALL-LABEL: {{^}}private_elt_size_i64: -; HSA-ELT16: private_element_size = 3 -; HSA-ELT8: private_element_size = 2 -; HSA-ELT4: private_element_size = 1 - -; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], 0 offset:1 -; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], 0 offset:2 -; HSA-ELTGE8-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[0:3], 0 offen -; HSA-ELTGE8: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]] - - -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}} - -; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} -; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}} -; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]] -define amdgpu_kernel void @private_elt_size_i64(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %idxprom = sext i32 %tid to i64 - %gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom - %index.load = load i32, ptr addrspace(1) %gep.index - %index = and i32 %index.load, 2 - %alloca = alloca [2 x i64], align 16, addrspace(5) - %gep1 = getelementptr inbounds [2 x i64], ptr addrspace(5) %alloca, i32 0, i32 1 - store i64 0, ptr addrspace(5) %alloca - store i64 34359738602, ptr addrspace(5) %gep1 - %gep2 = getelementptr inbounds [2 x i64], ptr addrspace(5) %alloca, i32 0, i32 %index - %load = load i64, ptr addrspace(5) %gep2 - store i64 %load, ptr addrspace(1) %out - ret void -} - -; ALL-LABEL: {{^}}private_elt_size_f64: -; HSA-ELT16: private_element_size = 3 -; HSA-ELT8: private_element_size = 2 -; HSA-ELT4: private_element_size = 1 - -; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16 -; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24 -; HSA-ELTGE8-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[0:3], 0 offen -; HSA-ELTGE8: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]] - - -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}} - -; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} -; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}} -; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]] -define amdgpu_kernel void @private_elt_size_f64(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %idxprom = sext i32 %tid to i64 - %gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom - %index.load = load i32, ptr addrspace(1) %gep.index - %index = and i32 %index.load, 2 - %alloca = alloca [2 x double], align 16, addrspace(5) - %gep1 = getelementptr inbounds [2 x double], ptr addrspace(5) %alloca, i32 0, i32 1 - store double 0.0, ptr addrspace(5) %alloca - store double 4.0, ptr addrspace(5) %gep1 - %gep2 = getelementptr inbounds [2 x double], ptr addrspace(5) %alloca, i32 0, i32 %index - %load = load double, ptr addrspace(5) %gep2 - store double %load, ptr addrspace(1) %out - ret void -} - -; ALL-LABEL: {{^}}private_elt_size_v2i64: -; HSA-ELT16: private_element_size = 3 -; HSA-ELT8: private_element_size = 2 -; HSA-ELT4: private_element_size = 1 - -; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16 -; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 -; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} - -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16{{$}} -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24 -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40 -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 - -; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen -; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen - - -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}} -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}} - -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} -define amdgpu_kernel void @private_elt_size_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %idxprom = sext i32 %tid to i64 - %gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom - %index.load = load i32, ptr addrspace(1) %gep.index - %index = and i32 %index.load, 2 - %alloca = alloca [2 x <2 x i64>], align 16, addrspace(5) - %gep1 = getelementptr inbounds [2 x <2 x i64>], ptr addrspace(5) %alloca, i32 0, i32 1 - store <2 x i64> zeroinitializer, ptr addrspace(5) %alloca - store <2 x i64> , ptr addrspace(5) %gep1 - %gep2 = getelementptr inbounds [2 x <2 x i64>], ptr addrspace(5) %alloca, i32 0, i32 %index - %load = load <2 x i64>, ptr addrspace(5) %gep2 - store <2 x i64> %load, ptr addrspace(1) %out - ret void -} - -declare i32 @llvm.amdgcn.workitem.id.x() #1 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll @@ -2,7 +2,7 @@ ; RUN: llc -O1 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s ; ALL-LABEL: {{^}}promote_alloca_i32_array_array: -; NOOPTS: workgroup_group_segment_byte_size = 0{{$}} +; NOOPTS: .amdhsa_group_segment_fixed_size 0 ; NOOPTS-NOT: ds_write ; OPTS: ds_write define amdgpu_kernel void @promote_alloca_i32_array_array(ptr addrspace(1) %out, i32 %index) #0 { @@ -18,7 +18,7 @@ } ; ALL-LABEL: {{^}}optnone_promote_alloca_i32_array_array: -; ALL: workgroup_group_segment_byte_size = 0{{$}} +; ALL: .amdhsa_group_segment_fixed_size 0 ; ALL-NOT: ds_write define amdgpu_kernel void @optnone_promote_alloca_i32_array_array(ptr addrspace(1) %out, i32 %index) #1 { entry: @@ -36,4 +36,4 @@ attributes #1 = { nounwind optnone noinline "amdgpu-flat-work-group-size"="64,64" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll @@ -31,7 +31,7 @@ ; GCN-LABEL: {{^}}promote_alloca_size_order_0: -; GCN: workgroup_group_segment_byte_size = 1060 +; GCN: .amdhsa_group_segment_fixed_size 1060 define amdgpu_kernel void @promote_alloca_size_order_0(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) #0 { entry: %stack = alloca [5 x i32], align 4, addrspace(5) @@ -62,7 +62,7 @@ } ; GCN-LABEL: {{^}}promote_alloca_size_order_1: -; GCN: workgroup_group_segment_byte_size = 1072 +; GCN: .amdhsa_group_segment_fixed_size 1072 define amdgpu_kernel void @promote_alloca_size_order_1(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) #0 { entry: %stack = alloca [5 x i32], align 4, addrspace(5) @@ -99,7 +99,7 @@ ; size limit, so it isn't promoted ; GCN-LABEL: {{^}}promote_alloca_align_pad_guess_over_limit: -; GCN: workgroup_group_segment_byte_size = 1060 +; GCN: .amdhsa_group_segment_fixed_size 1060 define amdgpu_kernel void @promote_alloca_align_pad_guess_over_limit(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) #0 { entry: %stack = alloca [5 x i32], align 4, addrspace(5) @@ -129,4 +129,4 @@ attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,7" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll b/llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s 2>&1 | FileCheck --check-prefix=GFX90C-VALID %s -; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s 2>&1 | FileCheck --check-prefix=GFX90C-ERROR %s -; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s 2>&1 | FileCheck --check-prefix=GFX940-ERROR %s - -; GFX90C-VALID: .hsa_code_object_isa 9,0,12,"AMD","AMDGPU" -; GFX90C-VALID: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx90c" -; GFX90C-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx90c with XNACK being ON or ANY - -; GFX940-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx940 - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll --- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll +++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll @@ -1,14 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V2 %s ; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V2 %s ; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V2 %s ; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V2 %s ; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V4 %s @@ -16,84 +12,6 @@ declare void @llvm.debugtrap() #1 define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) { -; NOHSA-TRAP-GFX900-V2-LABEL: trap: -; NOHSA-TRAP-GFX900-V2: .amd_kernel_code_t -; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 -; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: priority = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: float_mode = 240 -; NOHSA-TRAP-GFX900-V2-NEXT: priv = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: debug_mode = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 4 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: private_element_size = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 44 -; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8 -; NOHSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 -; NOHSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4 -; NOHSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4 -; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6 -; NOHSA-TRAP-GFX900-V2-NEXT: call_convention = -1 -; NOHSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t -; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.0: -; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 -; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm -; ; NOHSA-TRAP-GFX900-V3-LABEL: trap: ; NOHSA-TRAP-GFX900-V3: ; %bb.0: ; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 @@ -114,86 +32,6 @@ ; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) ; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm ; -; HSA-TRAP-GFX803-V2-LABEL: trap: -; HSA-TRAP-GFX803-V2: .amd_kernel_code_t -; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_major = 1 -; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_minor = 2 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_kind = 1 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_major = 8 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_minor = 0 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_stepping = 3 -; HSA-TRAP-GFX803-V2-NEXT: kernel_code_entry_byte_offset = 256 -; HSA-TRAP-GFX803-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: granulated_workitem_vgpr_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: granulated_wavefront_sgpr_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: priority = 0 -; HSA-TRAP-GFX803-V2-NEXT: float_mode = 240 -; HSA-TRAP-GFX803-V2-NEXT: priv = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_dx10_clamp = 1 -; HSA-TRAP-GFX803-V2-NEXT: debug_mode = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_ieee_mode = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_wgp_mode = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_mem_ordered = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_fwd_progress = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; HSA-TRAP-GFX803-V2-NEXT: user_sgpr_count = 8 -; HSA-TRAP-GFX803-V2-NEXT: enable_trap_handler = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_info = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_vgpr_workitem_id = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_exception_msb = 0 -; HSA-TRAP-GFX803-V2-NEXT: granulated_lds_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_exception = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_buffer = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_queue_ptr = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_id = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_wavefront_size32 = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_ordered_append_gds = 0 -; HSA-TRAP-GFX803-V2-NEXT: private_element_size = 1 -; HSA-TRAP-GFX803-V2-NEXT: is_ptr64 = 1 -; HSA-TRAP-GFX803-V2-NEXT: is_dynamic_callstack = 0 -; HSA-TRAP-GFX803-V2-NEXT: is_debug_enabled = 0 -; HSA-TRAP-GFX803-V2-NEXT: is_xnack_enabled = 0 -; HSA-TRAP-GFX803-V2-NEXT: workitem_private_segment_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: workgroup_group_segment_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: gds_segment_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_byte_size = 8 -; HSA-TRAP-GFX803-V2-NEXT: workgroup_fbarrier_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: wavefront_sgpr_count = 8 -; HSA-TRAP-GFX803-V2-NEXT: workitem_vgpr_count = 3 -; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_first = 0 -; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_first = 0 -; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; HSA-TRAP-GFX803-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_alignment = 4 -; HSA-TRAP-GFX803-V2-NEXT: group_segment_alignment = 4 -; HSA-TRAP-GFX803-V2-NEXT: private_segment_alignment = 4 -; HSA-TRAP-GFX803-V2-NEXT: wavefront_size = 6 -; HSA-TRAP-GFX803-V2-NEXT: call_convention = -1 -; HSA-TRAP-GFX803-V2-NEXT: runtime_loader_kernel_symbol = 0 -; HSA-TRAP-GFX803-V2-NEXT: .end_amd_kernel_code_t -; HSA-TRAP-GFX803-V2-NEXT: ; %bb.0: -; HSA-TRAP-GFX803-V2-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v2, 1 -; HSA-TRAP-GFX803-V2-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s2 -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s3 -; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V2-NEXT: s_trap 2 -; ; HSA-TRAP-GFX803-V3-LABEL: trap: ; HSA-TRAP-GFX803-V3: ; %bb.0: ; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 @@ -218,85 +56,6 @@ ; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX803-V4-NEXT: s_trap 2 ; -; HSA-TRAP-GFX900-V2-LABEL: trap: -; HSA-TRAP-GFX900-V2: .amd_kernel_code_t -; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1 -; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 -; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 -; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1 -; HSA-TRAP-GFX900-V2-NEXT: priority = 0 -; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240 -; HSA-TRAP-GFX900-V2-NEXT: priv = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 -; HSA-TRAP-GFX900-V2-NEXT: debug_mode = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; HSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 8 -; HSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0 -; HSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_exception = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 -; HSA-TRAP-GFX900-V2-NEXT: private_element_size = 1 -; HSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1 -; HSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 -; HSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0 -; HSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 -; HSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 -; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12 -; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2 -; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 -; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 -; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; HSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 -; HSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4 -; HSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4 -; HSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6 -; HSA-TRAP-GFX900-V2-NEXT: call_convention = -1 -; HSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 -; HSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t -; HSA-TRAP-GFX900-V2-NEXT: ; %bb.0: -; HSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 -; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 -; HSA-TRAP-GFX900-V2-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[2:3] -; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V2-NEXT: s_trap 2 -; ; HSA-TRAP-GFX900-V3-LABEL: trap: ; HSA-TRAP-GFX900-V3: ; %bb.0: ; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 @@ -318,84 +77,6 @@ ; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX900-V4-NEXT: s_trap 2 ; -; HSA-NOTRAP-GFX900-V2-LABEL: trap: -; HSA-NOTRAP-GFX900-V2: .amd_kernel_code_t -; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_major = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_kind = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 -; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240 -; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: debug_mode = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: user_sgpr_count = 8 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_trap_handler = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception_msb = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: granulated_lds_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: private_element_size = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: is_ptr64 = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: is_debug_enabled = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 -; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12 -; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 -; HSA-NOTRAP-GFX900-V2-NEXT: group_segment_alignment = 4 -; HSA-NOTRAP-GFX900-V2-NEXT: private_segment_alignment = 4 -; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_size = 6 -; HSA-NOTRAP-GFX900-V2-NEXT: call_convention = -1 -; HSA-NOTRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t -; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.0: -; HSA-NOTRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 -; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm -; ; HSA-NOTRAP-GFX900-V3-LABEL: trap: ; HSA-NOTRAP-GFX900-V3: ; %bb.0: ; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -423,91 +104,6 @@ } define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; NOHSA-TRAP-GFX900-V2-LABEL: non_entry_trap: -; NOHSA-TRAP-GFX900-V2: .amd_kernel_code_t -; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 -; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: priority = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: float_mode = 240 -; NOHSA-TRAP-GFX900-V2-NEXT: priv = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: debug_mode = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 4 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: private_element_size = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 44 -; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8 -; NOHSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 -; NOHSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4 -; NOHSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4 -; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6 -; NOHSA-TRAP-GFX900-V2-NEXT: call_convention = -1 -; NOHSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t -; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.0: ; %entry -; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc -; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; NOHSA-TRAP-GFX900-V2-NEXT: s_cbranch_vccz .LBB1_2 -; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret -; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3 -; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm -; NOHSA-TRAP-GFX900-V2-NEXT: .LBB1_2: ; %trap -; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm -; ; NOHSA-TRAP-GFX900-V3-LABEL: non_entry_trap: ; NOHSA-TRAP-GFX900-V3: ; %bb.0: ; %entry ; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 @@ -542,95 +138,6 @@ ; NOHSA-TRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap ; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm ; -; HSA-TRAP-GFX803-V2-LABEL: non_entry_trap: -; HSA-TRAP-GFX803-V2: .amd_kernel_code_t -; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_major = 1 -; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_minor = 2 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_kind = 1 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_major = 8 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_minor = 0 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_stepping = 3 -; HSA-TRAP-GFX803-V2-NEXT: kernel_code_entry_byte_offset = 256 -; HSA-TRAP-GFX803-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: granulated_workitem_vgpr_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: granulated_wavefront_sgpr_count = 1 -; HSA-TRAP-GFX803-V2-NEXT: priority = 0 -; HSA-TRAP-GFX803-V2-NEXT: float_mode = 240 -; HSA-TRAP-GFX803-V2-NEXT: priv = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_dx10_clamp = 1 -; HSA-TRAP-GFX803-V2-NEXT: debug_mode = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_ieee_mode = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_wgp_mode = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_mem_ordered = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_fwd_progress = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; HSA-TRAP-GFX803-V2-NEXT: user_sgpr_count = 8 -; HSA-TRAP-GFX803-V2-NEXT: enable_trap_handler = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_info = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_vgpr_workitem_id = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_exception_msb = 0 -; HSA-TRAP-GFX803-V2-NEXT: granulated_lds_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_exception = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_buffer = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_queue_ptr = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_id = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_wavefront_size32 = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_ordered_append_gds = 0 -; HSA-TRAP-GFX803-V2-NEXT: private_element_size = 1 -; HSA-TRAP-GFX803-V2-NEXT: is_ptr64 = 1 -; HSA-TRAP-GFX803-V2-NEXT: is_dynamic_callstack = 0 -; HSA-TRAP-GFX803-V2-NEXT: is_debug_enabled = 0 -; HSA-TRAP-GFX803-V2-NEXT: is_xnack_enabled = 0 -; HSA-TRAP-GFX803-V2-NEXT: workitem_private_segment_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: workgroup_group_segment_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: gds_segment_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_byte_size = 8 -; HSA-TRAP-GFX803-V2-NEXT: workgroup_fbarrier_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: wavefront_sgpr_count = 10 -; HSA-TRAP-GFX803-V2-NEXT: workitem_vgpr_count = 3 -; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_first = 0 -; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_first = 0 -; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; HSA-TRAP-GFX803-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_alignment = 4 -; HSA-TRAP-GFX803-V2-NEXT: group_segment_alignment = 4 -; HSA-TRAP-GFX803-V2-NEXT: private_segment_alignment = 4 -; HSA-TRAP-GFX803-V2-NEXT: wavefront_size = 6 -; HSA-TRAP-GFX803-V2-NEXT: call_convention = -1 -; HSA-TRAP-GFX803-V2-NEXT: runtime_loader_kernel_symbol = 0 -; HSA-TRAP-GFX803-V2-NEXT: .end_amd_kernel_code_t -; HSA-TRAP-GFX803-V2-NEXT: ; %bb.0: ; %entry -; HSA-TRAP-GFX803-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V2-NEXT: flat_load_dword v0, v[0:1] glc -; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 -; HSA-TRAP-GFX803-V2-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-TRAP-GFX803-V2-NEXT: ; %bb.1: ; %ret -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v2, 3 -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V2-NEXT: s_endpgm -; HSA-TRAP-GFX803-V2-NEXT: .LBB1_2: ; %trap -; HSA-TRAP-GFX803-V2-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX803-V2-NEXT: s_trap 2 -; ; HSA-TRAP-GFX803-V3-LABEL: non_entry_trap: ; HSA-TRAP-GFX803-V3: ; %bb.0: ; %entry ; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -673,92 +180,6 @@ ; HSA-TRAP-GFX803-V4-NEXT: s_mov_b64 s[0:1], s[4:5] ; HSA-TRAP-GFX803-V4-NEXT: s_trap 2 ; -; HSA-TRAP-GFX900-V2-LABEL: non_entry_trap: -; HSA-TRAP-GFX900-V2: .amd_kernel_code_t -; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1 -; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 -; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 -; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1 -; HSA-TRAP-GFX900-V2-NEXT: priority = 0 -; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240 -; HSA-TRAP-GFX900-V2-NEXT: priv = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 -; HSA-TRAP-GFX900-V2-NEXT: debug_mode = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; HSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 8 -; HSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0 -; HSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_exception = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 -; HSA-TRAP-GFX900-V2-NEXT: private_element_size = 1 -; HSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1 -; HSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 -; HSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0 -; HSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 -; HSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 -; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12 -; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2 -; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 -; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 -; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; HSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 -; HSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4 -; HSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4 -; HSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6 -; HSA-TRAP-GFX900-V2-NEXT: call_convention = -1 -; HSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 -; HSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t -; HSA-TRAP-GFX900-V2-NEXT: ; %bb.0: ; %entry -; HSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc -; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-TRAP-GFX900-V2-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-TRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret -; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3 -; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V2-NEXT: s_endpgm -; HSA-TRAP-GFX900-V2-NEXT: .LBB1_2: ; %trap -; HSA-TRAP-GFX900-V2-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX900-V2-NEXT: s_trap 2 -; ; HSA-TRAP-GFX900-V3-LABEL: non_entry_trap: ; HSA-TRAP-GFX900-V3: ; %bb.0: ; %entry ; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -794,91 +215,6 @@ ; HSA-TRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap ; HSA-TRAP-GFX900-V4-NEXT: s_trap 2 ; -; HSA-NOTRAP-GFX900-V2-LABEL: non_entry_trap: -; HSA-NOTRAP-GFX900-V2: .amd_kernel_code_t -; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_major = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_kind = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 -; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240 -; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: debug_mode = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: user_sgpr_count = 8 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_trap_handler = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception_msb = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: granulated_lds_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: private_element_size = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: is_ptr64 = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: is_debug_enabled = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 -; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12 -; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 -; HSA-NOTRAP-GFX900-V2-NEXT: group_segment_alignment = 4 -; HSA-NOTRAP-GFX900-V2-NEXT: private_segment_alignment = 4 -; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_size = 6 -; HSA-NOTRAP-GFX900-V2-NEXT: call_convention = -1 -; HSA-NOTRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t -; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.0: ; %entry -; HSA-NOTRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc -; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V2-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-NOTRAP-GFX900-V2-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.1: ; %ret -; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 3 -; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm -; HSA-NOTRAP-GFX900-V2-NEXT: .LBB1_2: ; %trap -; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm -; ; HSA-NOTRAP-GFX900-V3-LABEL: non_entry_trap: ; HSA-NOTRAP-GFX900-V3: ; %bb.0: ; %entry ; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -927,87 +263,6 @@ } define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0) { -; NOHSA-TRAP-GFX900-V2-LABEL: debugtrap: -; NOHSA-TRAP-GFX900-V2: .amd_kernel_code_t -; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 -; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: priority = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: float_mode = 240 -; NOHSA-TRAP-GFX900-V2-NEXT: priv = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: debug_mode = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 2 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: private_element_size = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 -; NOHSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 44 -; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 6 -; NOHSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 -; NOHSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4 -; NOHSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4 -; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6 -; NOHSA-TRAP-GFX900-V2-NEXT: call_convention = -1 -; NOHSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 -; NOHSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t -; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.0: -; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 -; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2 -; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1] -; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm -; ; NOHSA-TRAP-GFX900-V3-LABEL: debugtrap: ; NOHSA-TRAP-GFX900-V3: ; %bb.0: ; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1034,89 +289,6 @@ ; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) ; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm ; -; HSA-TRAP-GFX803-V2-LABEL: debugtrap: -; HSA-TRAP-GFX803-V2: .amd_kernel_code_t -; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_major = 1 -; HSA-TRAP-GFX803-V2-NEXT: amd_code_version_minor = 2 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_kind = 1 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_major = 8 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_minor = 0 -; HSA-TRAP-GFX803-V2-NEXT: amd_machine_version_stepping = 3 -; HSA-TRAP-GFX803-V2-NEXT: kernel_code_entry_byte_offset = 256 -; HSA-TRAP-GFX803-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: granulated_workitem_vgpr_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: granulated_wavefront_sgpr_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: priority = 0 -; HSA-TRAP-GFX803-V2-NEXT: float_mode = 240 -; HSA-TRAP-GFX803-V2-NEXT: priv = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_dx10_clamp = 1 -; HSA-TRAP-GFX803-V2-NEXT: debug_mode = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_ieee_mode = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_wgp_mode = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_mem_ordered = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_fwd_progress = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; HSA-TRAP-GFX803-V2-NEXT: user_sgpr_count = 6 -; HSA-TRAP-GFX803-V2-NEXT: enable_trap_handler = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_info = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_vgpr_workitem_id = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_exception_msb = 0 -; HSA-TRAP-GFX803-V2-NEXT: granulated_lds_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_exception = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_buffer = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_queue_ptr = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_id = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_wavefront_size32 = 0 -; HSA-TRAP-GFX803-V2-NEXT: enable_ordered_append_gds = 0 -; HSA-TRAP-GFX803-V2-NEXT: private_element_size = 1 -; HSA-TRAP-GFX803-V2-NEXT: is_ptr64 = 1 -; HSA-TRAP-GFX803-V2-NEXT: is_dynamic_callstack = 0 -; HSA-TRAP-GFX803-V2-NEXT: is_debug_enabled = 0 -; HSA-TRAP-GFX803-V2-NEXT: is_xnack_enabled = 0 -; HSA-TRAP-GFX803-V2-NEXT: workitem_private_segment_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: workgroup_group_segment_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: gds_segment_byte_size = 0 -; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_byte_size = 8 -; HSA-TRAP-GFX803-V2-NEXT: workgroup_fbarrier_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: wavefront_sgpr_count = 6 -; HSA-TRAP-GFX803-V2-NEXT: workitem_vgpr_count = 4 -; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_first = 0 -; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_first = 0 -; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_count = 0 -; HSA-TRAP-GFX803-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; HSA-TRAP-GFX803-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_alignment = 4 -; HSA-TRAP-GFX803-V2-NEXT: group_segment_alignment = 4 -; HSA-TRAP-GFX803-V2-NEXT: private_segment_alignment = 4 -; HSA-TRAP-GFX803-V2-NEXT: wavefront_size = 6 -; HSA-TRAP-GFX803-V2-NEXT: call_convention = -1 -; HSA-TRAP-GFX803-V2-NEXT: runtime_loader_kernel_symbol = 0 -; HSA-TRAP-GFX803-V2-NEXT: .end_amd_kernel_code_t -; HSA-TRAP-GFX803-V2-NEXT: ; %bb.0: -; HSA-TRAP-GFX803-V2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v2, 1 -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v3, 2 -; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V2-NEXT: s_trap 3 -; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v3 -; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V2-NEXT: s_endpgm -; ; HSA-TRAP-GFX803-V3-LABEL: debugtrap: ; HSA-TRAP-GFX803-V3: ; %bb.0: ; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1147,88 +319,6 @@ ; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX803-V4-NEXT: s_endpgm ; -; HSA-TRAP-GFX900-V2-LABEL: debugtrap: -; HSA-TRAP-GFX900-V2: .amd_kernel_code_t -; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1 -; HSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 -; HSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 -; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 -; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1 -; HSA-TRAP-GFX900-V2-NEXT: priority = 0 -; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240 -; HSA-TRAP-GFX900-V2-NEXT: priv = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 -; HSA-TRAP-GFX900-V2-NEXT: debug_mode = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; HSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 6 -; HSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0 -; HSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_exception = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 -; HSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 -; HSA-TRAP-GFX900-V2-NEXT: private_element_size = 1 -; HSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1 -; HSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 -; HSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0 -; HSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 -; HSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 -; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 -; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10 -; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3 -; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 -; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 -; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 -; HSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; HSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 -; HSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4 -; HSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4 -; HSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6 -; HSA-TRAP-GFX900-V2-NEXT: call_convention = -1 -; HSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 -; HSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t -; HSA-TRAP-GFX900-V2-NEXT: ; %bb.0: -; HSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 -; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2 -; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V2-NEXT: s_trap 3 -; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1] -; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V2-NEXT: s_endpgm -; ; HSA-TRAP-GFX900-V3-LABEL: debugtrap: ; HSA-TRAP-GFX900-V3: ; %bb.0: ; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1257,87 +347,6 @@ ; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX900-V4-NEXT: s_endpgm ; -; HSA-NOTRAP-GFX900-V2-LABEL: debugtrap: -; HSA-NOTRAP-GFX900-V2: .amd_kernel_code_t -; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_major = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_code_version_minor = 2 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_kind = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_major = 9 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256 -; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240 -; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: debug_mode = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_ieee_mode = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_wgp_mode = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_mem_ordered = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_fwd_progress = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: user_sgpr_count = 6 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_trap_handler = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception_msb = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: granulated_lds_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: private_element_size = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: is_ptr64 = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: is_debug_enabled = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: is_xnack_enabled = 1 -; HSA-NOTRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8 -; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10 -; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4 -; HSA-NOTRAP-GFX900-V2-NEXT: group_segment_alignment = 4 -; HSA-NOTRAP-GFX900-V2-NEXT: private_segment_alignment = 4 -; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_size = 6 -; HSA-NOTRAP-GFX900-V2-NEXT: call_convention = -1 -; HSA-NOTRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0 -; HSA-NOTRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t -; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.0: -; HSA-NOTRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 -; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2 -; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1] -; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm -; ; HSA-NOTRAP-GFX900-V3-LABEL: debugtrap: ; HSA-NOTRAP-GFX900-V3: ; %bb.0: ; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll --- a/llvm/test/CodeGen/AMDGPU/trap.ll +++ b/llvm/test/CodeGen/AMDGPU/trap.ll @@ -38,12 +38,11 @@ ; NOMESA-TRAP-NEXT: .long 144 ; GCN-LABEL: {{^}}hsa_trap: -; HSA-TRAP: enable_trap_handler = 0 ; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] ; HSA-TRAP: s_trap 2 +; HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0 ; for llvm.trap in hsa path without ABI, direct generate s_endpgm instruction without any warning information -; NO-HSA-TRAP: enable_trap_handler = 0 ; NO-HSA-TRAP: s_endpgm ; NO-HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0 @@ -67,12 +66,11 @@ ; NOMESA-TRAP-NEXT: .long 140 ; GCN-LABEL: {{^}}hsa_debugtrap: -; HSA-TRAP: enable_trap_handler = 0 ; HSA-TRAP: s_trap 3 ; HSA-TRAP: flat_store_dword v[0:1], v3 +; HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0 ; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction -; NO-HSA-TRAP: enable_trap_handler = 0 ; NO-HSA-TRAP: s_endpgm ; TRAP-BIT: enable_trap_handler = 1 @@ -148,4 +146,4 @@ attributes #1 = { nounwind } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/MC/AMDGPU/hsa-exp.s b/llvm/test/MC/AMDGPU/hsa-exp.s --- a/llvm/test/MC/AMDGPU/hsa-exp.s +++ b/llvm/test/MC/AMDGPU/hsa-exp.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM -// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF +// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=4 -show-encoding %s | FileCheck %s --check-prefix=ASM +// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=4 -show-encoding %s | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF // ELF: Section { // ELF: Name: .text @@ -8,121 +8,67 @@ // ELF: SHF_ALLOC (0x2) // ELF: SHF_EXECINSTR (0x4) -// ELF: SHT_NOTE -// ELF: 0000: 04000000 08000000 01000000 414D4400 -// ELF: 0010: 02000000 00000000 04000000 1B000000 -// ELF: 0020: 03000000 414D4400 04000700 07000000 -// ELF: 0030: 00000000 00000000 414D4400 414D4447 -// ELF: 0040: 50550000 - // ELF: Symbol { -// ELF: Name: amd_kernel_code_t_minimal -// ELF: Type: AMDGPU_HSA_KERNEL (0xA) +// ELF: Name: minimal // ELF: Section: .text // ELF: } .text // ASM: .text -.hsa_code_object_version 2,0 -// ASM: .hsa_code_object_version 2,0 - -.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" -// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" - -.amdgpu_hsa_kernel amd_kernel_code_t_minimal +.amdgcn_target "amdgcn-unknown-amdhsa--gfx700" +// ASM: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" .set my_is_ptr64, 1 .if my_is_ptr64 == 0 -.set my_kernarg_segment_byte_size, 32 +.set my_next_free_vgpr, 4 .else -.set my_kernarg_segment_byte_size, 16 +.set my_next_free_vgpr, 8 .endif -.set my_sgpr, 8 - - -amd_kernel_code_t_minimal: -.amd_kernel_code_t - amd_code_version_major = .option.machine_version_major - enable_sgpr_kernarg_segment_ptr = 1 - is_ptr64 = my_is_ptr64 - granulated_workitem_vgpr_count = 1 - granulated_wavefront_sgpr_count = 1+(my_sgpr-1)/8 - user_sgpr_count = 2 - kernarg_segment_byte_size = my_kernarg_segment_byte_size - wavefront_sgpr_count = my_sgpr -// wavefront_sgpr_count = 7 -; wavefront_sgpr_count = 7 -// Make sure a blank line won't break anything: +.set my_sgpr, 6 -// Make sure a line with whitespace won't break anything: +minimal: +.amdhsa_kernel minimal + .amdhsa_next_free_vgpr 1+(my_next_free_vgpr-1) + // Make sure a blank line won't break anything: - workitem_vgpr_count = 16 -.end_amd_kernel_code_t + .amdhsa_next_free_sgpr my_sgpr/2+3 +.end_amdhsa_kernel -// ASM-LABEL: {{^}}amd_kernel_code_t_minimal: -// ASM: .amd_kernel_code_t -// ASM: amd_code_version_major = 7 -// ASM: amd_code_version_minor = 2 -// ASM: amd_machine_kind = 1 -// ASM: amd_machine_version_major = 7 -// ASM: amd_machine_version_minor = 0 -// ASM: amd_machine_version_stepping = 0 -// ASM: kernel_code_entry_byte_offset = 256 -// ASM: kernel_code_prefetch_byte_size = 0 -// ASM: granulated_workitem_vgpr_count = 1 -// ASM: granulated_wavefront_sgpr_count = 1 -// ASM: priority = 0 -// ASM: float_mode = 0 -// ASM: priv = 0 -// ASM: enable_dx10_clamp = 0 -// ASM: debug_mode = 0 -// ASM: enable_ieee_mode = 0 -// ASM: enable_sgpr_private_segment_wave_byte_offset = 0 -// ASM: user_sgpr_count = 2 -// ASM: enable_sgpr_workgroup_id_x = 0 -// ASM: enable_sgpr_workgroup_id_y = 0 -// ASM: enable_sgpr_workgroup_id_z = 0 -// ASM: enable_sgpr_workgroup_info = 0 -// ASM: enable_vgpr_workitem_id = 0 -// ASM: enable_exception_msb = 0 -// ASM: granulated_lds_size = 0 -// ASM: enable_exception = 0 -// ASM: enable_sgpr_private_segment_buffer = 0 -// ASM: enable_sgpr_dispatch_ptr = 0 -// ASM: enable_sgpr_queue_ptr = 0 -// ASM: enable_sgpr_kernarg_segment_ptr = 1 -// ASM: enable_sgpr_dispatch_id = 0 -// ASM: enable_sgpr_flat_scratch_init = 0 -// ASM: enable_sgpr_private_segment_size = 0 -// ASM: enable_sgpr_grid_workgroup_count_x = 0 -// ASM: enable_sgpr_grid_workgroup_count_y = 0 -// ASM: enable_sgpr_grid_workgroup_count_z = 0 -// ASM: enable_ordered_append_gds = 0 -// ASM: private_element_size = 0 -// ASM: is_ptr64 = 1 -// ASM: is_dynamic_callstack = 0 -// ASM: is_debug_enabled = 0 -// ASM: is_xnack_enabled = 0 -// ASM: workitem_private_segment_byte_size = 0 -// ASM: workgroup_group_segment_byte_size = 0 -// ASM: gds_segment_byte_size = 0 -// ASM: kernarg_segment_byte_size = 16 -// ASM: workgroup_fbarrier_count = 0 -// ASM: wavefront_sgpr_count = 8 -// ASM: workitem_vgpr_count = 16 -// ASM: reserved_vgpr_first = 0 -// ASM: reserved_vgpr_count = 0 -// ASM: reserved_sgpr_first = 0 -// ASM: reserved_sgpr_count = 0 -// ASM: debug_wavefront_private_segment_offset_sgpr = 0 -// ASM: debug_private_segment_buffer_sgpr = 0 -// ASM: kernarg_segment_alignment = 4 -// ASM: group_segment_alignment = 4 -// ASM: private_segment_alignment = 4 -// ASM: wavefront_size = 6 -// ASM: call_convention = -1 -// ASM: runtime_loader_kernel_symbol = 0 -// ASM: .end_amd_kernel_code_t +; ASM-LABEL: minimal: +; ASM: .amdhsa_kernel minimal +; ASM: .amdhsa_group_segment_fixed_size 0 +; ASM: .amdhsa_private_segment_fixed_size 0 +; ASM: .amdhsa_kernarg_size 0 +; ASM: .amdhsa_user_sgpr_count 0 +; ASM: .amdhsa_user_sgpr_private_segment_buffer 0 +; ASM: .amdhsa_user_sgpr_dispatch_ptr 0 +; ASM: .amdhsa_user_sgpr_queue_ptr 0 +; ASM: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; ASM: .amdhsa_user_sgpr_dispatch_id 0 +; ASM: .amdhsa_user_sgpr_flat_scratch_init 0 +; ASM: .amdhsa_user_sgpr_private_segment_size 0 +; ASM: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; ASM: .amdhsa_system_sgpr_workgroup_id_x 1 +; ASM: .amdhsa_system_sgpr_workgroup_id_y 0 +; ASM: .amdhsa_system_sgpr_workgroup_id_z 0 +; ASM: .amdhsa_system_sgpr_workgroup_info 0 +; ASM: .amdhsa_system_vgpr_workitem_id 0 +; ASM: .amdhsa_next_free_vgpr 8 +; ASM: .amdhsa_next_free_sgpr 6 +; ASM: .amdhsa_float_round_mode_32 0 +; ASM: .amdhsa_float_round_mode_16_64 0 +; ASM: .amdhsa_float_denorm_mode_32 0 +; ASM: .amdhsa_float_denorm_mode_16_64 3 +; ASM: .amdhsa_dx10_clamp 1 +; ASM: .amdhsa_ieee_mode 1 +; ASM: .amdhsa_exception_fp_ieee_invalid_op 0 +; ASM: .amdhsa_exception_fp_denorm_src 0 +; ASM: .amdhsa_exception_fp_ieee_div_zero 0 +; ASM: .amdhsa_exception_fp_ieee_overflow 0 +; ASM: .amdhsa_exception_fp_ieee_underflow 0 +; ASM: .amdhsa_exception_fp_ieee_inexact 0 +; ASM: .amdhsa_exception_int_div_zero 0 +; ASM: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10.s b/llvm/test/MC/AMDGPU/hsa-gfx10.s deleted file mode 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx10.s +++ /dev/null @@ -1,284 +0,0 @@ -// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck %s --check-prefix=ASM -// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF - -// ELF: Section { -// ELF: Name: .text -// ELF: Type: SHT_PROGBITS (0x1) -// ELF: Flags [ (0x6) -// ELF: SHF_ALLOC (0x2) -// ELF: SHF_EXECINSTR (0x4) - -// ELF: SHT_NOTE -// ELF: 0000: 04000000 08000000 01000000 414D4400 -// ELF: 0010: 02000000 00000000 04000000 1B000000 -// ELF: 0020: 03000000 414D4400 04000700 07000000 -// ELF: 0030: 00000000 00000000 414D4400 414D4447 -// ELF: 0040: 50550000 -// We can't check binary representation of metadata note: it is different on -// Windows and Linux because of carriage return on Windows - -// ELF: Symbol { -// ELF: Name: amd_kernel_code_t_test_all -// ELF: Type: AMDGPU_HSA_KERNEL (0xA) -// ELF: Section: .text -// ELF: } -// ELF: Symbol { -// ELF: Name: amd_kernel_code_t_minimal -// ELF: Type: AMDGPU_HSA_KERNEL (0xA) -// ELF: Section: .text -// ELF: } - -.text -// ASM: .text - -.hsa_code_object_version 2,0 -// ASM: .hsa_code_object_version 2,0 - -.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" -// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" - -.amd_amdgpu_hsa_metadata - Version: [ 3, 0 ] - Kernels: - - Name: amd_kernel_code_t_test_all - SymbolName: amd_kernel_code_t_test_all@kd - - Name: amd_kernel_code_t_minimal - SymbolName: amd_kernel_code_t_minimal@kd -.end_amd_amdgpu_hsa_metadata - -// ASM: .amd_amdgpu_hsa_metadata -// ASM: Version: [ 3, 0 ] -// ASM: Kernels: -// ASM: - Name: amd_kernel_code_t_test_all -// ASM: SymbolName: 'amd_kernel_code_t_test_all@kd' -// ASM: - Name: amd_kernel_code_t_minimal -// ASM: SymbolName: 'amd_kernel_code_t_minimal@kd' -// ASM: .end_amd_amdgpu_hsa_metadata - -.amdgpu_hsa_kernel amd_kernel_code_t_test_all -.amdgpu_hsa_kernel amd_kernel_code_t_minimal - -amd_kernel_code_t_test_all: -; Test all amd_kernel_code_t members with non-default values. -.amd_kernel_code_t - kernel_code_version_major = 100 - kernel_code_version_minor = 100 - machine_kind = 0 - machine_version_major = 5 - machine_version_minor = 5 - machine_version_stepping = 5 - kernel_code_entry_byte_offset = 512 - kernel_code_prefetch_byte_size = 1 - max_scratch_backing_memory_byte_size = 1 - compute_pgm_rsrc1_vgprs = 1 - compute_pgm_rsrc1_sgprs = 1 - compute_pgm_rsrc1_priority = 1 - compute_pgm_rsrc1_float_mode = 1 - compute_pgm_rsrc1_priv = 1 - compute_pgm_rsrc1_dx10_clamp = 1 - compute_pgm_rsrc1_debug_mode = 1 - compute_pgm_rsrc1_ieee_mode = 1 - compute_pgm_rsrc1_wgp_mode = 0 - compute_pgm_rsrc1_mem_ordered = 0 - compute_pgm_rsrc1_fwd_progress = 1 - compute_pgm_rsrc2_scratch_en = 1 - compute_pgm_rsrc2_user_sgpr = 1 - compute_pgm_rsrc2_tgid_x_en = 1 - compute_pgm_rsrc2_tgid_y_en = 1 - compute_pgm_rsrc2_tgid_z_en = 1 - compute_pgm_rsrc2_tg_size_en = 1 - compute_pgm_rsrc2_tidig_comp_cnt = 1 - compute_pgm_rsrc2_excp_en_msb = 1 - compute_pgm_rsrc2_lds_size = 1 - compute_pgm_rsrc2_excp_en = 1 - enable_sgpr_private_segment_buffer = 1 - enable_sgpr_dispatch_ptr = 1 - enable_sgpr_queue_ptr = 1 - enable_sgpr_kernarg_segment_ptr = 1 - enable_sgpr_dispatch_id = 1 - enable_sgpr_flat_scratch_init = 1 - enable_sgpr_private_segment_size = 1 - enable_sgpr_grid_workgroup_count_x = 1 - enable_sgpr_grid_workgroup_count_y = 1 - enable_sgpr_grid_workgroup_count_z = 1 - enable_ordered_append_gds = 1 - private_element_size = 1 - is_ptr64 = 1 - is_dynamic_callstack = 1 - is_debug_enabled = 1 - is_xnack_enabled = 1 - workitem_private_segment_byte_size = 1 - workgroup_group_segment_byte_size = 1 - gds_segment_byte_size = 1 - kernarg_segment_byte_size = 1 - workgroup_fbarrier_count = 1 - wavefront_sgpr_count = 1 - workitem_vgpr_count = 1 - reserved_vgpr_first = 1 - reserved_vgpr_count = 1 - reserved_sgpr_first = 1 - reserved_sgpr_count = 1 - debug_wavefront_private_segment_offset_sgpr = 1 - debug_private_segment_buffer_sgpr = 1 - kernarg_segment_alignment = 5 - group_segment_alignment = 5 - private_segment_alignment = 5 - wavefront_size = 6 - call_convention = 1 - runtime_loader_kernel_symbol = 1 -.end_amd_kernel_code_t - -// ASM-LABEL: {{^}}amd_kernel_code_t_test_all: -// ASM: .amd_kernel_code_t -// ASM: amd_code_version_major = 100 -// ASM: amd_code_version_minor = 100 -// ASM: amd_machine_kind = 0 -// ASM: amd_machine_version_major = 5 -// ASM: amd_machine_version_minor = 5 -// ASM: amd_machine_version_stepping = 5 -// ASM: kernel_code_entry_byte_offset = 512 -// ASM: kernel_code_prefetch_byte_size = 1 -// ASM: granulated_workitem_vgpr_count = 1 -// ASM: granulated_wavefront_sgpr_count = 1 -// ASM: priority = 1 -// ASM: float_mode = 1 -// ASM: priv = 1 -// ASM: enable_dx10_clamp = 1 -// ASM: debug_mode = 1 -// ASM: enable_ieee_mode = 1 -// ASM: enable_wgp_mode = 0 -// ASM: enable_mem_ordered = 0 -// ASM: enable_fwd_progress = 1 -// ASM: enable_sgpr_private_segment_wave_byte_offset = 1 -// ASM: user_sgpr_count = 1 -// ASM: enable_sgpr_workgroup_id_x = 1 -// ASM: enable_sgpr_workgroup_id_y = 1 -// ASM: enable_sgpr_workgroup_id_z = 1 -// ASM: enable_sgpr_workgroup_info = 1 -// ASM: enable_vgpr_workitem_id = 1 -// ASM: enable_exception_msb = 1 -// ASM: granulated_lds_size = 1 -// ASM: enable_exception = 1 -// ASM: enable_sgpr_private_segment_buffer = 1 -// ASM: enable_sgpr_dispatch_ptr = 1 -// ASM: enable_sgpr_queue_ptr = 1 -// ASM: enable_sgpr_kernarg_segment_ptr = 1 -// ASM: enable_sgpr_dispatch_id = 1 -// ASM: enable_sgpr_flat_scratch_init = 1 -// ASM: enable_sgpr_private_segment_size = 1 -// ASM: enable_sgpr_grid_workgroup_count_x = 1 -// ASM: enable_sgpr_grid_workgroup_count_y = 1 -// ASM: enable_sgpr_grid_workgroup_count_z = 1 -// ASM: enable_ordered_append_gds = 1 -// ASM: private_element_size = 1 -// ASM: is_ptr64 = 1 -// ASM: is_dynamic_callstack = 1 -// ASM: is_debug_enabled = 1 -// ASM: is_xnack_enabled = 1 -// ASM: workitem_private_segment_byte_size = 1 -// ASM: workgroup_group_segment_byte_size = 1 -// ASM: gds_segment_byte_size = 1 -// ASM: kernarg_segment_byte_size = 1 -// ASM: workgroup_fbarrier_count = 1 -// ASM: wavefront_sgpr_count = 1 -// ASM: workitem_vgpr_count = 1 -// ASM: reserved_vgpr_first = 1 -// ASM: reserved_vgpr_count = 1 -// ASM: reserved_sgpr_first = 1 -// ASM: reserved_sgpr_count = 1 -// ASM: debug_wavefront_private_segment_offset_sgpr = 1 -// ASM: debug_private_segment_buffer_sgpr = 1 -// ASM: kernarg_segment_alignment = 5 -// ASM: group_segment_alignment = 5 -// ASM: private_segment_alignment = 5 -// ASM: wavefront_size = 6 -// ASM: call_convention = 1 -// ASM: runtime_loader_kernel_symbol = 1 -// ASM: .end_amd_kernel_code_t - -amd_kernel_code_t_minimal: -.amd_kernel_code_t - enable_sgpr_kernarg_segment_ptr = 1 - is_ptr64 = 1 - granulated_workitem_vgpr_count = 1 - granulated_wavefront_sgpr_count = 1 - user_sgpr_count = 2 - kernarg_segment_byte_size = 16 - wavefront_sgpr_count = 8 -// wavefront_sgpr_count = 7 -; wavefront_sgpr_count = 7 -// Make sure a blank line won't break anything: - -// Make sure a line with whitespace won't break anything: - - workitem_vgpr_count = 16 -.end_amd_kernel_code_t - -// ASM-LABEL: {{^}}amd_kernel_code_t_minimal: -// ASM: .amd_kernel_code_t -// ASM: amd_code_version_major = 1 -// ASM: amd_code_version_minor = 2 -// ASM: amd_machine_kind = 1 -// ASM: amd_machine_version_major = 10 -// ASM: amd_machine_version_minor = 1 -// ASM: amd_machine_version_stepping = 0 -// ASM: kernel_code_entry_byte_offset = 256 -// ASM: kernel_code_prefetch_byte_size = 0 -// ASM: granulated_workitem_vgpr_count = 1 -// ASM: granulated_wavefront_sgpr_count = 1 -// ASM: priority = 0 -// ASM: float_mode = 0 -// ASM: priv = 0 -// ASM: enable_dx10_clamp = 0 -// ASM: debug_mode = 0 -// ASM: enable_ieee_mode = 0 -// ASM: enable_wgp_mode = 1 -// ASM: enable_mem_ordered = 1 -// ASM: enable_fwd_progress = 0 -// ASM: enable_sgpr_private_segment_wave_byte_offset = 0 -// ASM: user_sgpr_count = 2 -// ASM: enable_sgpr_workgroup_id_x = 0 -// ASM: enable_sgpr_workgroup_id_y = 0 -// ASM: enable_sgpr_workgroup_id_z = 0 -// ASM: enable_sgpr_workgroup_info = 0 -// ASM: enable_vgpr_workitem_id = 0 -// ASM: enable_exception_msb = 0 -// ASM: granulated_lds_size = 0 -// ASM: enable_exception = 0 -// ASM: enable_sgpr_private_segment_buffer = 0 -// ASM: enable_sgpr_dispatch_ptr = 0 -// ASM: enable_sgpr_queue_ptr = 0 -// ASM: enable_sgpr_kernarg_segment_ptr = 1 -// ASM: enable_sgpr_dispatch_id = 0 -// ASM: enable_sgpr_flat_scratch_init = 0 -// ASM: enable_sgpr_private_segment_size = 0 -// ASM: enable_sgpr_grid_workgroup_count_x = 0 -// ASM: enable_sgpr_grid_workgroup_count_y = 0 -// ASM: enable_sgpr_grid_workgroup_count_z = 0 -// ASM: enable_wavefront_size32 = 0 -// ASM: enable_ordered_append_gds = 0 -// ASM: private_element_size = 0 -// ASM: is_ptr64 = 1 -// ASM: is_dynamic_callstack = 0 -// ASM: is_debug_enabled = 0 -// ASM: is_xnack_enabled = 0 -// ASM: workitem_private_segment_byte_size = 0 -// ASM: workgroup_group_segment_byte_size = 0 -// ASM: gds_segment_byte_size = 0 -// ASM: kernarg_segment_byte_size = 16 -// ASM: workgroup_fbarrier_count = 0 -// ASM: wavefront_sgpr_count = 8 -// ASM: workitem_vgpr_count = 16 -// ASM: reserved_vgpr_first = 0 -// ASM: reserved_vgpr_count = 0 -// ASM: reserved_sgpr_first = 0 -// ASM: reserved_sgpr_count = 0 -// ASM: debug_wavefront_private_segment_offset_sgpr = 0 -// ASM: debug_private_segment_buffer_sgpr = 0 -// ASM: kernarg_segment_alignment = 4 -// ASM: group_segment_alignment = 4 -// ASM: private_segment_alignment = 4 -// ASM: wavefront_size = 6 -// ASM: call_convention = -1 -// ASM: runtime_loader_kernel_symbol = 0 -// ASM: .end_amd_kernel_code_t diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args.s deleted file mode 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args.s +++ /dev/null @@ -1,70 +0,0 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s - -// The legacy ValueType field should be parsed without error, but not -// re-emitted. - -// CHECK: .amd_amdgpu_hsa_metadata -// CHECK: Version: [ 1, 0 ] -// CHECK: Printf: -// CHECK: - '1:1:4:%d\n' -// CHECK: - '2:1:8:%g\n' -// CHECK: Kernels: -// CHECK: - Name: test_kernel -// CHECK: SymbolName: 'test_kernel@kd' -// CHECK: Language: OpenCL C -// CHECK: LanguageVersion: [ 2, 0 ] -// CHECK: Args: -// CHECK: - TypeName: char -// CHECK: Size: 1 -// CHECK: Align: 1 -// CHECK: ValueKind: ByValue -// CHECK: AccQual: Default -// CHECK: - Size: 8 -// CHECK: Align: 8 -// CHECK: ValueKind: HiddenGlobalOffsetX -// CHECK: - Size: 8 -// CHECK: Align: 8 -// CHECK: ValueKind: HiddenGlobalOffsetY -// CHECK: - Size: 8 -// CHECK: Align: 8 -// CHECK: ValueKind: HiddenGlobalOffsetZ -// CHECK: - Size: 8 -// CHECK: Align: 8 -// CHECK: ValueKind: HiddenPrintfBuffer -// CHECK: AddrSpaceQual: Global -// CHECK: .end_amd_amdgpu_hsa_metadata -.amd_amdgpu_hsa_metadata - Version: [ 1, 0 ] - Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] - Kernels: - - Name: test_kernel - SymbolName: test_kernel@kd - Language: OpenCL C - LanguageVersion: [ 2, 0 ] - Args: - - TypeName: char - Size: 1 - Align: 1 - ValueKind: ByValue - ValueType: I8 - AccQual: Default - - Size: 8 - Align: 8 - ValueKind: HiddenGlobalOffsetX - ValueType: I64 - - Size: 8 - Align: 8 - ValueKind: HiddenGlobalOffsetY - ValueType: I64 - - Size: 8 - Align: 8 - ValueKind: HiddenGlobalOffsetZ - ValueType: I64 - - Size: 8 - Align: 8 - ValueKind: HiddenPrintfBuffer - ValueType: I8 - AddrSpaceQual: Global -.end_amd_amdgpu_hsa_metadata diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs.s deleted file mode 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs.s +++ /dev/null @@ -1,32 +0,0 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s - -// CHECK: .amd_amdgpu_hsa_metadata -// CHECK: Version: [ 1, 0 ] -// CHECK: Printf: -// CHECK: - '1:1:4:%d\n' -// CHECK: - '2:1:8:%g\n' -// CHECK: Kernels: -// CHECK: - Name: test_kernel -// CHECK: SymbolName: 'test_kernel@kd' -// CHECK: Language: OpenCL C -// CHECK: LanguageVersion: [ 2, 0 ] -// CHECK: Attrs: -// CHECK: ReqdWorkGroupSize: [ 1, 2, 4 ] -// CHECK: WorkGroupSizeHint: [ 8, 16, 32 ] -// CHECK: VecTypeHint: int -// CHECK: .end_amd_amdgpu_hsa_metadata -.amd_amdgpu_hsa_metadata - Version: [ 1, 0 ] - Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] - Kernels: - - Name: test_kernel - SymbolName: test_kernel@kd - Language: OpenCL C - LanguageVersion: [ 2, 0 ] - Attrs: - ReqdWorkGroupSize: [ 1, 2, 4 ] - WorkGroupSizeHint: [ 8, 16, 32 ] - VecTypeHint: int -.end_amd_amdgpu_hsa_metadata diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s deleted file mode 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s +++ /dev/null @@ -1,34 +0,0 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s - -// CHECK: .amd_amdgpu_hsa_metadata -// CHECK: Version: [ 1, 0 ] -// CHECK: Kernels: -// CHECK: - Name: test_kernel -// CHECK: SymbolName: 'test_kernel@kd' -// CHECK: CodeProps: -// CHECK: KernargSegmentSize: 24 -// CHECK: GroupSegmentFixedSize: 24 -// CHECK: PrivateSegmentFixedSize: 16 -// CHECK: KernargSegmentAlign: 16 -// CHECK: WavefrontSize: 64 -// CHECK: MaxFlatWorkGroupSize: 256 -// CHECK: NumSpilledSGPRs: 1 -// CHECK: NumSpilledVGPRs: 1 -.amd_amdgpu_hsa_metadata - Version: [ 1, 0 ] - Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] - Kernels: - - Name: test_kernel - SymbolName: test_kernel@kd - CodeProps: - KernargSegmentSize: 24 - GroupSegmentFixedSize: 24 - PrivateSegmentFixedSize: 16 - KernargSegmentAlign: 16 - WavefrontSize: 64 - MaxFlatWorkGroupSize: 256 - NumSpilledSGPRs: 1 - NumSpilledVGPRs: 1 -.end_amd_amdgpu_hsa_metadata diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-debug-props.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-debug-props.s deleted file mode 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-debug-props.s +++ /dev/null @@ -1,28 +0,0 @@ -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s -// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s - -// CHECK: .amd_amdgpu_hsa_metadata -// CHECK: Version: [ 1, 0 ] -// CHECK: Kernels: -// CHECK: - Name: test_kernel -// CHECK: SymbolName: 'test_kernel@kd' -// CHECK: DebugProps: -// CHECK: DebuggerABIVersion: [ 1, 0 ] -// CHECK: ReservedNumVGPRs: 4 -// CHECK: ReservedFirstVGPR: 11 -// CHECK: PrivateSegmentBufferSGPR: 0 -// CHECK: WavefrontPrivateSegmentOffsetSGPR: 11 -.amd_amdgpu_hsa_metadata - Version: [ 1, 0 ] - Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] - Kernels: - - Name: test_kernel - SymbolName: test_kernel@kd - DebugProps: - DebuggerABIVersion: [ 1, 0 ] - ReservedNumVGPRs: 4 - ReservedFirstVGPR: 11 - PrivateSegmentBufferSGPR: 0 - WavefrontPrivateSegmentOffsetSGPR: 11 -.end_amd_amdgpu_hsa_metadata diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-unknown-key.s b/llvm/test/MC/AMDGPU/hsa-metadata-unknown-key.s deleted file mode 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-unknown-key.s +++ /dev/null @@ -1,42 +0,0 @@ -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -filetype=obj %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -filetype=obj %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -filetype=obj %s 2>&1 | FileCheck %s - -// CHECK: error: unknown key 'UnknownKey' -.amd_amdgpu_hsa_metadata - UnknownKey: [ 2, 0 ] - Version: [ 1, 0 ] - Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] - Kernels: - - Name: test_kernel - SymbolName: test_kernel@kd - Language: OpenCL C - LanguageVersion: [ 2, 0 ] - Args: - - Size: 1 - Align: 1 - ValueKind: ByValue - ValueType: I8 - AccQual: Default - TypeName: char - - Size: 8 - Align: 8 - ValueKind: HiddenGlobalOffsetX - ValueType: I64 - - Size: 8 - Align: 8 - ValueKind: HiddenGlobalOffsetY - ValueType: I64 - - Size: 8 - Align: 8 - ValueKind: HiddenGlobalOffsetZ - ValueType: I64 - - Size: 8 - Align: 8 - ValueKind: HiddenPrintfBuffer - ValueType: I8 - AddrSpaceQual: Global -.end_amd_amdgpu_hsa_metadata diff --git a/llvm/test/MC/AMDGPU/hsa-text.s b/llvm/test/MC/AMDGPU/hsa-text.s deleted file mode 100644 --- a/llvm/test/MC/AMDGPU/hsa-text.s +++ /dev/null @@ -1,28 +0,0 @@ -// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM -// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj -S --sd - | FileCheck %s --check-prefix=ELF - -// For compatibility reasons we treat convert .text sections to .hsatext - -// ELF: Section { - -// ELF: Name: .text -// ELF: Type: SHT_PROGBITS (0x1) -// ELF: Flags [ (0x6) -// ELF: SHF_ALLOC (0x2) -// ELF: SHF_EXECINSTR (0x4) -// ELF: Size: 260 -// ELF: } - -.text -// ASM: .text - -.hsa_code_object_version 1,0 -// ASM: .hsa_code_object_version 1,0 - -.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" -// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" - -.amd_kernel_code_t -.end_amd_kernel_code_t - -s_endpgm diff --git a/llvm/test/MC/AMDGPU/hsa-wave-size.s b/llvm/test/MC/AMDGPU/hsa-wave-size.s deleted file mode 100644 --- a/llvm/test/MC/AMDGPU/hsa-wave-size.s +++ /dev/null @@ -1,65 +0,0 @@ -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 %s | FileCheck --check-prefixes=GCN,GFX7 %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 %s | FileCheck --check-prefixes=GCN,GFX10-W32 %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 %s | FileCheck --check-prefixes=GCN,GFX10-W64 %s - -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 %s 2>&1 | FileCheck --check-prefix=GFX7-ERR %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=GFX10-W32-ERR %s -// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=GFX10-W64-ERR %s - -// GCN: test0: -// GFX7: enable_wavefront_size32 = 0 -// GFX7: wavefront_size = 6 -// GFX10-W32: enable_wavefront_size32 = 1 -// GFX10-W32: wavefront_size = 5 -// GFX10-W64: enable_wavefront_size32 = 0 -// GFX10-W64: wavefront_size = 6 -.amdgpu_hsa_kernel test0 -test0: -.amd_kernel_code_t -.end_amd_kernel_code_t - -// GCN: test1: -// GFX7: enable_wavefront_size32 = 0 -// GFX7: wavefront_size = 6 -// GFX10-W32-ERR: error: enable_wavefront_size32=0 requires +WavefrontSize64 -// GFX10-W64: enable_wavefront_size32 = 0 -// GFX10-W64: wavefront_size = 6 -.amdgpu_hsa_kernel test1 -test1: -.amd_kernel_code_t - enable_wavefront_size32 = 0 -.end_amd_kernel_code_t - -// GCN: test2: -// GFX7: enable_wavefront_size32 = 0 -// GFX7: wavefront_size = 6 -// GFX10-W32-ERR: error: wavefront_size=6 requires +WavefrontSize64 -// GFX10-W64: enable_wavefront_size32 = 0 -// GFX10-W64: wavefront_size = 6 -.amdgpu_hsa_kernel test2 -test2: -.amd_kernel_code_t - wavefront_size = 6 -.end_amd_kernel_code_t - -// GCN: test3: -// GFX7-ERR: error: enable_wavefront_size32=1 is only allowed on GFX10+ -// GFX10-W32: enable_wavefront_size32 = 1 -// GFX10-W32: wavefront_size = 5 -// GFX10-W64-ERR: error: enable_wavefront_size32=1 requires +WavefrontSize32 -.amdgpu_hsa_kernel test3 -test3: -.amd_kernel_code_t - enable_wavefront_size32 = 1 -.end_amd_kernel_code_t - -// GCN: test4: -// GFX7-ERR: error: wavefront_size=5 is only allowed on GFX10+ -// GFX10-W32: enable_wavefront_size32 = 1 -// GFX10-W32: wavefront_size = 5 -// GFX10-W64-ERR: error: wavefront_size=5 requires +WavefrontSize32 -.amdgpu_hsa_kernel test4 -test4: -.amd_kernel_code_t - wavefront_size = 5 -.end_amd_kernel_code_t diff --git a/llvm/test/MC/AMDGPU/hsa.s b/llvm/test/MC/AMDGPU/hsa.s deleted file mode 100644 --- a/llvm/test/MC/AMDGPU/hsa.s +++ /dev/null @@ -1,274 +0,0 @@ -// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM -// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF - -// ELF: Section { -// ELF: Name: .text -// ELF: Type: SHT_PROGBITS (0x1) -// ELF: Flags [ (0x6) -// ELF: SHF_ALLOC (0x2) -// ELF: SHF_EXECINSTR (0x4) - -// ELF: SHT_NOTE -// ELF: 0000: 04000000 08000000 01000000 414D4400 -// ELF: 0010: 02000000 00000000 04000000 1B000000 -// ELF: 0020: 03000000 414D4400 04000700 07000000 -// ELF: 0030: 00000000 00000000 414D4400 414D4447 -// ELF: 0040: 50550000 -// We can't check binary representation of metadata note: it is different on -// Windows and Linux because of carriage return on Windows - -// ELF: Symbol { -// ELF: Name: amd_kernel_code_t_test_all -// ELF: Type: AMDGPU_HSA_KERNEL (0xA) -// ELF: Section: .text -// ELF: } -// ELF: Symbol { -// ELF: Name: amd_kernel_code_t_minimal -// ELF: Type: AMDGPU_HSA_KERNEL (0xA) -// ELF: Section: .text -// ELF: } - -.text -// ASM: .text - -.hsa_code_object_version 2,0 -// ASM: .hsa_code_object_version 2,0 - -.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" -// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" - -.amd_amdgpu_hsa_metadata - Version: [ 3, 0 ] - Kernels: - - Name: amd_kernel_code_t_test_all - SymbolName: amd_kernel_code_t_test_all@kd - - Name: amd_kernel_code_t_minimal - SymbolName: amd_kernel_code_t_minimal@kd -.end_amd_amdgpu_hsa_metadata - -// ASM: .amd_amdgpu_hsa_metadata -// ASM: Version: [ 3, 0 ] -// ASM: Kernels: -// ASM: - Name: amd_kernel_code_t_test_all -// ASM: SymbolName: 'amd_kernel_code_t_test_all@kd' -// ASM: - Name: amd_kernel_code_t_minimal -// ASM: SymbolName: 'amd_kernel_code_t_minimal@kd' -// ASM: .end_amd_amdgpu_hsa_metadata - -.amdgpu_hsa_kernel amd_kernel_code_t_test_all -.amdgpu_hsa_kernel amd_kernel_code_t_minimal - -amd_kernel_code_t_test_all: -; Test all amd_kernel_code_t members with non-default values. -.amd_kernel_code_t - kernel_code_version_major = 100 - kernel_code_version_minor = 100 - machine_kind = 0 - machine_version_major = 5 - machine_version_minor = 5 - machine_version_stepping = 5 - kernel_code_entry_byte_offset = 512 - kernel_code_prefetch_byte_size = 1 - max_scratch_backing_memory_byte_size = 1 - compute_pgm_rsrc1_vgprs = 1 - compute_pgm_rsrc1_sgprs = 1 - compute_pgm_rsrc1_priority = 1 - compute_pgm_rsrc1_float_mode = 1 - compute_pgm_rsrc1_priv = 1 - compute_pgm_rsrc1_dx10_clamp = 1 - compute_pgm_rsrc1_debug_mode = 1 - compute_pgm_rsrc1_ieee_mode = 1 - compute_pgm_rsrc2_scratch_en = 1 - compute_pgm_rsrc2_user_sgpr = 1 - compute_pgm_rsrc2_tgid_x_en = 1 - compute_pgm_rsrc2_tgid_y_en = 1 - compute_pgm_rsrc2_tgid_z_en = 1 - compute_pgm_rsrc2_tg_size_en = 1 - compute_pgm_rsrc2_tidig_comp_cnt = 1 - compute_pgm_rsrc2_excp_en_msb = 1 - compute_pgm_rsrc2_lds_size = 1 - compute_pgm_rsrc2_excp_en = 1 - enable_sgpr_private_segment_buffer = 1 - enable_sgpr_dispatch_ptr = 1 - enable_sgpr_queue_ptr = 1 - enable_sgpr_kernarg_segment_ptr = 1 - enable_sgpr_dispatch_id = 1 - enable_sgpr_flat_scratch_init = 1 - enable_sgpr_private_segment_size = 1 - enable_sgpr_grid_workgroup_count_x = 1 - enable_sgpr_grid_workgroup_count_y = 1 - enable_sgpr_grid_workgroup_count_z = 1 - enable_ordered_append_gds = 1 - private_element_size = 1 - is_ptr64 = 1 - is_dynamic_callstack = 1 - is_debug_enabled = 1 - is_xnack_enabled = 1 - workitem_private_segment_byte_size = 1 - workgroup_group_segment_byte_size = 1 - gds_segment_byte_size = 1 - kernarg_segment_byte_size = 1 - workgroup_fbarrier_count = 1 - wavefront_sgpr_count = 1 - workitem_vgpr_count = 1 - reserved_vgpr_first = 1 - reserved_vgpr_count = 1 - reserved_sgpr_first = 1 - reserved_sgpr_count = 1 - debug_wavefront_private_segment_offset_sgpr = 1 - debug_private_segment_buffer_sgpr = 1 - kernarg_segment_alignment = 5 - group_segment_alignment = 5 - private_segment_alignment = 5 - wavefront_size = 6 - call_convention = 1 - runtime_loader_kernel_symbol = 1 -.end_amd_kernel_code_t - -// ASM-LABEL: {{^}}amd_kernel_code_t_test_all: -// ASM: .amd_kernel_code_t -// ASM: amd_code_version_major = 100 -// ASM: amd_code_version_minor = 100 -// ASM: amd_machine_kind = 0 -// ASM: amd_machine_version_major = 5 -// ASM: amd_machine_version_minor = 5 -// ASM: amd_machine_version_stepping = 5 -// ASM: kernel_code_entry_byte_offset = 512 -// ASM: kernel_code_prefetch_byte_size = 1 -// ASM: granulated_workitem_vgpr_count = 1 -// ASM: granulated_wavefront_sgpr_count = 1 -// ASM: priority = 1 -// ASM: float_mode = 1 -// ASM: priv = 1 -// ASM: enable_dx10_clamp = 1 -// ASM: debug_mode = 1 -// ASM: enable_ieee_mode = 1 -// ASM: enable_sgpr_private_segment_wave_byte_offset = 1 -// ASM: user_sgpr_count = 1 -// ASM: enable_sgpr_workgroup_id_x = 1 -// ASM: enable_sgpr_workgroup_id_y = 1 -// ASM: enable_sgpr_workgroup_id_z = 1 -// ASM: enable_sgpr_workgroup_info = 1 -// ASM: enable_vgpr_workitem_id = 1 -// ASM: enable_exception_msb = 1 -// ASM: granulated_lds_size = 1 -// ASM: enable_exception = 1 -// ASM: enable_sgpr_private_segment_buffer = 1 -// ASM: enable_sgpr_dispatch_ptr = 1 -// ASM: enable_sgpr_queue_ptr = 1 -// ASM: enable_sgpr_kernarg_segment_ptr = 1 -// ASM: enable_sgpr_dispatch_id = 1 -// ASM: enable_sgpr_flat_scratch_init = 1 -// ASM: enable_sgpr_private_segment_size = 1 -// ASM: enable_sgpr_grid_workgroup_count_x = 1 -// ASM: enable_sgpr_grid_workgroup_count_y = 1 -// ASM: enable_sgpr_grid_workgroup_count_z = 1 -// ASM: enable_ordered_append_gds = 1 -// ASM: private_element_size = 1 -// ASM: is_ptr64 = 1 -// ASM: is_dynamic_callstack = 1 -// ASM: is_debug_enabled = 1 -// ASM: is_xnack_enabled = 1 -// ASM: workitem_private_segment_byte_size = 1 -// ASM: workgroup_group_segment_byte_size = 1 -// ASM: gds_segment_byte_size = 1 -// ASM: kernarg_segment_byte_size = 1 -// ASM: workgroup_fbarrier_count = 1 -// ASM: wavefront_sgpr_count = 1 -// ASM: workitem_vgpr_count = 1 -// ASM: reserved_vgpr_first = 1 -// ASM: reserved_vgpr_count = 1 -// ASM: reserved_sgpr_first = 1 -// ASM: reserved_sgpr_count = 1 -// ASM: debug_wavefront_private_segment_offset_sgpr = 1 -// ASM: debug_private_segment_buffer_sgpr = 1 -// ASM: kernarg_segment_alignment = 5 -// ASM: group_segment_alignment = 5 -// ASM: private_segment_alignment = 5 -// ASM: wavefront_size = 6 -// ASM: call_convention = 1 -// ASM: runtime_loader_kernel_symbol = 1 -// ASM: .end_amd_kernel_code_t - -amd_kernel_code_t_minimal: -.amd_kernel_code_t - enable_sgpr_kernarg_segment_ptr = 1 - is_ptr64 = 1 - granulated_workitem_vgpr_count = 1 - granulated_wavefront_sgpr_count = 1 - user_sgpr_count = 2 - kernarg_segment_byte_size = 16 - wavefront_sgpr_count = 8 -// wavefront_sgpr_count = 7 -; wavefront_sgpr_count = 7 -// Make sure a blank line won't break anything: - -// Make sure a line with whitespace won't break anything: - - workitem_vgpr_count = 16 -.end_amd_kernel_code_t - -// ASM-LABEL: {{^}}amd_kernel_code_t_minimal: -// ASM: .amd_kernel_code_t -// ASM: amd_code_version_major = 1 -// ASM: amd_code_version_minor = 2 -// ASM: amd_machine_kind = 1 -// ASM: amd_machine_version_major = 7 -// ASM: amd_machine_version_minor = 0 -// ASM: amd_machine_version_stepping = 0 -// ASM: kernel_code_entry_byte_offset = 256 -// ASM: kernel_code_prefetch_byte_size = 0 -// ASM: granulated_workitem_vgpr_count = 1 -// ASM: granulated_wavefront_sgpr_count = 1 -// ASM: priority = 0 -// ASM: float_mode = 0 -// ASM: priv = 0 -// ASM: enable_dx10_clamp = 0 -// ASM: debug_mode = 0 -// ASM: enable_ieee_mode = 0 -// ASM: enable_sgpr_private_segment_wave_byte_offset = 0 -// ASM: user_sgpr_count = 2 -// ASM: enable_sgpr_workgroup_id_x = 0 -// ASM: enable_sgpr_workgroup_id_y = 0 -// ASM: enable_sgpr_workgroup_id_z = 0 -// ASM: enable_sgpr_workgroup_info = 0 -// ASM: enable_vgpr_workitem_id = 0 -// ASM: enable_exception_msb = 0 -// ASM: granulated_lds_size = 0 -// ASM: enable_exception = 0 -// ASM: enable_sgpr_private_segment_buffer = 0 -// ASM: enable_sgpr_dispatch_ptr = 0 -// ASM: enable_sgpr_queue_ptr = 0 -// ASM: enable_sgpr_kernarg_segment_ptr = 1 -// ASM: enable_sgpr_dispatch_id = 0 -// ASM: enable_sgpr_flat_scratch_init = 0 -// ASM: enable_sgpr_private_segment_size = 0 -// ASM: enable_sgpr_grid_workgroup_count_x = 0 -// ASM: enable_sgpr_grid_workgroup_count_y = 0 -// ASM: enable_sgpr_grid_workgroup_count_z = 0 -// ASM: enable_ordered_append_gds = 0 -// ASM: private_element_size = 0 -// ASM: is_ptr64 = 1 -// ASM: is_dynamic_callstack = 0 -// ASM: is_debug_enabled = 0 -// ASM: is_xnack_enabled = 0 -// ASM: workitem_private_segment_byte_size = 0 -// ASM: workgroup_group_segment_byte_size = 0 -// ASM: gds_segment_byte_size = 0 -// ASM: kernarg_segment_byte_size = 16 -// ASM: workgroup_fbarrier_count = 0 -// ASM: wavefront_sgpr_count = 8 -// ASM: workitem_vgpr_count = 16 -// ASM: reserved_vgpr_first = 0 -// ASM: reserved_vgpr_count = 0 -// ASM: reserved_sgpr_first = 0 -// ASM: reserved_sgpr_count = 0 -// ASM: debug_wavefront_private_segment_offset_sgpr = 0 -// ASM: debug_private_segment_buffer_sgpr = 0 -// ASM: kernarg_segment_alignment = 4 -// ASM: group_segment_alignment = 4 -// ASM: private_segment_alignment = 4 -// ASM: wavefront_size = 6 -// ASM: call_convention = -1 -// ASM: runtime_loader_kernel_symbol = 0 -// ASM: .end_amd_kernel_code_t diff --git a/llvm/test/MC/AMDGPU/hsa_code_object_isa_args.s b/llvm/test/MC/AMDGPU/hsa_code_object_isa_args.s deleted file mode 100644 --- a/llvm/test/MC/AMDGPU/hsa_code_object_isa_args.s +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM --check-prefix=ASM_700 -// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM --check-prefix=ASM_803 -// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=stoney --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM --check-prefix=ASM_810 -// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj -S --sd - | FileCheck %s --check-prefix=ELF --check-prefix=ELF_700 -// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj -S --sd - | FileCheck %s --check-prefix=ELF --check-prefix=ELF_803 -// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=stoney --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj -S --sd - | FileCheck %s --check-prefix=ELF --check-prefix=ELF_810 - -// ELF: SHT_NOTE -// ELF: 0000: 04000000 08000000 01000000 414D4400 -// ELF: 0010: 01000000 00000000 04000000 1B000000 -// ELF_700: 0020: 03000000 414D4400 04000700 07000000 -// ELF_700: 0030: 00000000 00000000 414D4400 414D4447 -// ELF_803: 0020: 03000000 414D4400 04000700 08000000 -// ELF_803: 0030: 00000000 03000000 414D4400 414D4447 -// ELF_810: 0020: 03000000 414D4400 04000700 08000000 -// ELF_810: 0030: 01000000 00000000 414D4400 414D4447 -// ELF: 0040: 50550000 - -.hsa_code_object_version 1,0 -// ASM: .hsa_code_object_version 1,0 - -// Test defaults -.hsa_code_object_isa -// ASM_700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" -// ASM_803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU" -// ASM_810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU" - -// Test expressions and symbols -.set A,2 -.hsa_code_object_isa A+1,A*2,A/A+4,"AMD","AMDGPU" -// ASM: .hsa_code_object_isa 3,4,5,"AMD","AMDGPU" diff --git a/llvm/test/MC/AMDGPU/isa-version-hsa.s b/llvm/test/MC/AMDGPU/isa-version-hsa.s --- a/llvm/test/MC/AMDGPU/isa-version-hsa.s +++ b/llvm/test/MC/AMDGPU/isa-version-hsa.s @@ -1,13 +1,13 @@ // RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s // RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx802 %s | FileCheck --check-prefix=OSABI-HSA %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland %s | FileCheck --check-prefix=OSABI-HSA %s -// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx803 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx802 %s | FileCheck --check-prefix=OSABI-HSA %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=iceland %s | FileCheck --check-prefix=OSABI-HSA %s +// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx803 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s // RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s // RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s -// OSABI-HSA: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802" -// OSABI-UNK-ERR: error: target id must match options -// OSABI-HSA-ERR: error: target id must match options -// OSABI-PAL-ERR: error: target id must match options -.amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802" +// OSABI-HSA: .amdgcn_target "amdgcn-amd-amdhsa--gfx802" +// OSABI-UNK-ERR: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx802 does not match the specified target id amdgcn-amd-unknown--gfx802 +// OSABI-HSA-ERR: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx802 does not match the specified target id amdgcn-amd-amdhsa--gfx803 +// OSABI-PAL-ERR: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx802 does not match the specified target id amdgcn-amd-amdpal--gfx802 +.amdgcn_target "amdgcn-amd-amdhsa--gfx802" diff --git a/llvm/test/MC/AMDGPU/isa-version-pal.s b/llvm/test/MC/AMDGPU/isa-version-pal.s --- a/llvm/test/MC/AMDGPU/isa-version-pal.s +++ b/llvm/test/MC/AMDGPU/isa-version-pal.s @@ -1,13 +1,14 @@ // RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s // RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s -// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s -// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s +// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s +// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s // RUN: llvm-mc -triple amdgcn-amd-amdpal -mcpu=gfx802 %s | FileCheck --check-prefix=OSABI-PAL %s // RUN: llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s | FileCheck --check-prefix=OSABI-PAL %s // RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s // OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802" // OSABI-UNK-ERR: error: target id must match options -// OSABI-HSA-ERR: error: target id must match options +// OSABI-HSA-ERR: error: .amdgcn_target directive's target id amdgcn-amd-amdpal--gfx802 does not match the specified target id amdgcn-amd-amdhsa--gfx802 // OSABI-PAL-ERR: error: target id must match options .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802" +.amdgcn_target "amdgcn-amd-amdpal--gfx802" diff --git a/llvm/test/MC/AMDGPU/isa-version-unk.s b/llvm/test/MC/AMDGPU/isa-version-unk.s --- a/llvm/test/MC/AMDGPU/isa-version-unk.s +++ b/llvm/test/MC/AMDGPU/isa-version-unk.s @@ -1,13 +1,14 @@ // RUN: llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s | FileCheck --check-prefix=OSABI-UNK %s // RUN: llvm-mc -triple amdgcn-amd-unknown -mcpu=iceland %s | FileCheck --check-prefix=OSABI-UNK %s // RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx803 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s -// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s -// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s +// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s +// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s // RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s // RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s // OSABI-UNK: .amd_amdgpu_isa "amdgcn-amd-unknown--gfx802" // OSABI-UNK-ERR: error: target id must match options -// OSABI-HSA-ERR: error: target id must match options +// OSABI-HSA-ERR: error: .amdgcn_target directive's target id amdgcn-amd-unknown--gfx802 does not match the specified target id amdgcn-amd-amdhsa--gfx802 // OSABI-PAL-ERR: error: target id must match options .amd_amdgpu_isa "amdgcn-amd-unknown--gfx802" +.amdgcn_target "amdgcn-amd-unknown--gfx802"