Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s -check-prefix=NONAMDHSA // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s Index: llvm/docs/AMDGPUUsage.rst =================================================================== --- llvm/docs/AMDGPUUsage.rst +++ llvm/docs/AMDGPUUsage.rst @@ -2106,6 +2106,9 @@ Features* must be included in the list if they are enabled even if that is the default for *Processor*. +Caution: + AMD HSA Os is not supported in Southern Islands (GFX6) ASICs. + For example: ``"amdgcn-amd-amdhsa--gfx902+xnack"`` Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -59,6 +59,17 @@ cl::desc("Enable the use of AA during codegen."), cl::init(true)); +static AMDGPUSubtarget::Generation initializeGen(const Triple &TT, + StringRef GPU) { + if (GPU.contains("generic")) { + return TT.getOS() == Triple::AMDHSA + ? AMDGPUSubtarget::Generation::SEA_ISLANDS + : AMDGPUSubtarget::Generation::SOUTHERN_ISLANDS; + } else { + return AMDGPUSubtarget::Generation::SOUTHERN_ISLANDS; + } +} + GCNSubtarget::~GCNSubtarget() = default; R600Subtarget & @@ -118,6 +129,12 @@ FlatForGlobal = true; } + // bail out from compilation for HSA OS type in GFX6 + if (isAmdHsaOS() && getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { + report_fatal_error("GFX6 (SI) ASICs does not support AMD HSA OS type \n", + false); + } + // Set defaults if needed. if (MaxPrivateElementSize == 0) MaxPrivateElementSize = 4; @@ -178,114 +195,59 @@ { } GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const GCNTargetMachine &TM) : - AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), - AMDGPUSubtarget(TT), - TargetTriple(TT), - Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS), - InstrItins(getInstrItineraryForCPU(GPU)), - LDSBankCount(0), - MaxPrivateElementSize(0), - - FastFMAF32(false), - FastDenormalF32(false), - HalfRate64Ops(false), - - FlatForGlobal(false), - AutoWaitcntBeforeBarrier(false), - UnalignedScratchAccess(false), - UnalignedAccessMode(false), - - HasApertureRegs(false), - EnableXNACK(false), - DoesNotSupportXNACK(false), - EnableCuMode(false), - TrapHandler(false), - - EnableLoadStoreOpt(false), - EnableUnsafeDSOffsetFolding(false), - EnableSIScheduler(false), - EnableDS128(false), - EnablePRTStrictNull(false), - DumpCode(false), - - FP64(false), - GCN3Encoding(false), - CIInsts(false), - GFX8Insts(false), - GFX9Insts(false), - GFX10Insts(false), - GFX10_3Insts(false), - GFX7GFX8GFX9Insts(false), - SGPRInitBug(false), - HasSMemRealTime(false), - HasIntClamp(false), - HasFmaMixInsts(false), - HasMovrel(false), - HasVGPRIndexMode(false), - HasScalarStores(false), - HasScalarAtomics(false), - HasSDWAOmod(false), - HasSDWAScalar(false), - HasSDWASdst(false), - HasSDWAMac(false), - HasSDWAOutModsVOPC(false), - HasDPP(false), - HasDPP8(false), - HasR128A16(false), - HasGFX10A16(false), - HasG16(false), - HasNSAEncoding(false), - GFX10_BEncoding(false), - HasDLInsts(false), - HasDot1Insts(false), - HasDot2Insts(false), - HasDot3Insts(false), - HasDot4Insts(false), - HasDot5Insts(false), - HasDot6Insts(false), - HasMAIInsts(false), - HasPkFmacF16Inst(false), - HasAtomicFaddInsts(false), - EnableSRAMECC(false), - DoesNotSupportSRAMECC(false), - HasNoSdstCMPX(false), - HasVscnt(false), - HasGetWaveIdInst(false), - HasSMemTimeInst(false), - HasRegisterBanking(false), - HasVOP3Literal(false), - HasNoDataDepHazard(false), - FlatAddressSpace(false), - FlatInstOffsets(false), - FlatGlobalInsts(false), - FlatScratchInsts(false), - ScalarFlatScratchInsts(false), - AddNoCarryInsts(false), - HasUnpackedD16VMem(false), - LDSMisalignedBug(false), - HasMFMAInlineLiteralBug(false), - UnalignedBufferAccess(false), - UnalignedDSAccess(false), - - ScalarizeGlobal(false), - - HasVcmpxPermlaneHazard(false), - HasVMEMtoScalarWriteHazard(false), - HasSMEMtoVectorWriteHazard(false), - HasInstFwdPrefetchBug(false), - HasVcmpxExecWARHazard(false), - HasLdsBranchVmemWARHazard(false), - HasNSAtoVMEMBug(false), - HasOffset3fBug(false), - HasFlatSegmentOffsetBug(false), - HasImageStoreD16Bug(false), - HasImageGather4D16Bug(false), - - FeatureDisable(false), - InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)), - TLInfo(TM, *this), - FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) { + const GCNTargetMachine &TM) + : AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), AMDGPUSubtarget(TT), + TargetTriple(TT), Gen(initializeGen(TT, GPU)), + InstrItins(getInstrItineraryForCPU(GPU)), LDSBankCount(0), + MaxPrivateElementSize(0), + + FastFMAF32(false), FastDenormalF32(false), HalfRate64Ops(false), + + FlatForGlobal(false), AutoWaitcntBeforeBarrier(false), + UnalignedScratchAccess(false), UnalignedAccessMode(false), + + HasApertureRegs(false), EnableXNACK(false), DoesNotSupportXNACK(false), + EnableCuMode(false), TrapHandler(false), + + EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), + EnableSIScheduler(false), EnableDS128(false), EnablePRTStrictNull(false), + DumpCode(false), + + FP64(false), GCN3Encoding(false), CIInsts(false), GFX8Insts(false), + GFX9Insts(false), GFX10Insts(false), GFX10_3Insts(false), + GFX7GFX8GFX9Insts(false), SGPRInitBug(false), HasSMemRealTime(false), + HasIntClamp(false), HasFmaMixInsts(false), HasMovrel(false), + HasVGPRIndexMode(false), HasScalarStores(false), HasScalarAtomics(false), + HasSDWAOmod(false), HasSDWAScalar(false), HasSDWASdst(false), + HasSDWAMac(false), HasSDWAOutModsVOPC(false), HasDPP(false), + HasDPP8(false), HasR128A16(false), HasGFX10A16(false), HasG16(false), + HasNSAEncoding(false), GFX10_BEncoding(false), HasDLInsts(false), + HasDot1Insts(false), HasDot2Insts(false), HasDot3Insts(false), + HasDot4Insts(false), HasDot5Insts(false), HasDot6Insts(false), + HasMAIInsts(false), HasPkFmacF16Inst(false), HasAtomicFaddInsts(false), + EnableSRAMECC(false), DoesNotSupportSRAMECC(false), HasNoSdstCMPX(false), + HasVscnt(false), HasGetWaveIdInst(false), HasSMemTimeInst(false), + HasRegisterBanking(false), HasVOP3Literal(false), + HasNoDataDepHazard(false), FlatAddressSpace(false), + FlatInstOffsets(false), FlatGlobalInsts(false), FlatScratchInsts(false), + ScalarFlatScratchInsts(false), AddNoCarryInsts(false), + HasUnpackedD16VMem(false), LDSMisalignedBug(false), + HasMFMAInlineLiteralBug(false), UnalignedBufferAccess(false), + UnalignedDSAccess(false), + + ScalarizeGlobal(false), + + HasVcmpxPermlaneHazard(false), HasVMEMtoScalarWriteHazard(false), + HasSMEMtoVectorWriteHazard(false), HasInstFwdPrefetchBug(false), + HasVcmpxExecWARHazard(false), HasLdsBranchVmemWARHazard(false), + HasNSAtoVMEMBug(false), HasOffset3fBug(false), + HasFlatSegmentOffsetBug(false), HasImageStoreD16Bug(false), + HasImageGather4D16Bug(false), + + FeatureDisable(false), + InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)), + TLInfo(TM, *this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) { MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this); CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering())); Index: llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll =================================================================== --- llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll +++ llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-unknown- -mcpu=tahiti -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s ; Make sure nothing crashes on targets with or without AGPRs Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn-amd- -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn-amd- -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn-amd- -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s Index: llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -1,11 +1,3 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX600 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=GFX600 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx601 < %s | FileCheck --check-prefixes=GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=pitcairn < %s | FileCheck --check-prefixes=GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=verde < %s | FileCheck --check-prefixes=GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx602 < %s | FileCheck --check-prefixes=GFX602 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hainan < %s | FileCheck --check-prefixes=GFX602 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=oland < %s | FileCheck --check-prefixes=GFX602 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX700 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck --check-prefixes=GFX700 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx701 < %s | FileCheck --check-prefixes=GFX701 %s @@ -44,9 +36,6 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+sram-ecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX904 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sram-ecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s -; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" -; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" -; GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602" ; GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" ; GFX701: .amdgcn_target "amdgcn-amd-amdhsa--gfx701" ; GFX702: .amdgcn_target "amdgcn-amd-amdhsa--gfx702" Index: llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll +++ llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll @@ -1,4 +1,3 @@ -; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s ; RUN: not --crash llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -o - %s | FileCheck -check-prefix=HSA-DEFAULT %s Index: llvm/test/CodeGen/AMDGPU/lower-kernargs-si-mesa.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/lower-kernargs-si-mesa.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; FIXME: Manually added checks for metadata nodes at bottom +; RUN: opt -mtriple=amdgcn-- -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=MESA %s + +target datalayout = "A5" + +define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #0 { +; MESA-LABEL: @kern_lds_ptr_si( +; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 +; MESA-NEXT: ret void +; + store i32 0, i32 addrspace(3)* %lds, align 4 + ret void +} + +attributes #0 = { nounwind "target-cpu"="tahiti" } Index: llvm/test/CodeGen/AMDGPU/lower-kernargs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lower-kernargs.ll +++ llvm/test/CodeGen/AMDGPU/lower-kernargs.ll @@ -530,24 +530,6 @@ ret void } -define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 { -; HSA-LABEL: @kern_lds_ptr_si( -; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() -; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_SI_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)* -; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 -; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4 -; HSA-NEXT: ret void -; -; MESA-LABEL: @kern_lds_ptr_si( -; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() -; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 -; MESA-NEXT: ret void -; - store i32 0, i32 addrspace(3)* %lds, align 4 - ret void -} - define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 { ; HSA-LABEL: @kern_realign_i8_i8( ; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() @@ -1914,7 +1896,6 @@ attributes #0 = { nounwind "target-cpu"="kaveri" } attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" } -attributes #2 = { nounwind "target-cpu"="tahiti" } ; GCN: 0 = !{} ; GCN: !1 = !{i64 42}